/* -*-C-*- */
#include "moar.h"
#include "jit/internal.h"
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wimplicit-fallthrough"
#include "dasm_x86.h"
#pragma GCC diagnostic pop
#pragma GCC diagnostic ignored "-Wunused-variable"

#ifdef _MSC_VER
#pragma warning( disable : 4129 )
#endif

/**
 * CONVENTIONS

 * Much of this file contains snippets of assembly code, which are concatenated
 * at runtime in order to form a single executable routine. It is essential for
 * the correctness of the result that each of the snippets behaves
 * nicely. Because you can't be expected to know what that is, it is documented
 * here.

 * REGISTERS:

 * Register and calling conventions differ between POSIX and windows
 * systems. The registers rax, rcx, rdx, r8, r9, r10 and r11 are caller-saved,
 * meaning that you are free to overwrrite them, and functions you may call are
 * free to do thesame. Hence you should save their values on stack, if you wish
 * to keep them after calling. In contrast, rbx, rsp, rbp, and r12-r15 are
 * callee-saved, meaning that their values before entering and after returning
 * from a function must be the same. POSIX also makes rdi and rsi caller-saved,
 * windows makes them callee-saved. For this reason we avoid using them.  The
 * first 4 (windows) or 6 (POSIX) function call arguments are placed in
 * registers. These registers sets are not the same between windows and POSIX,
 * but they're always caller-saved.

 * To deal with the ambiguity, register names have been aliased.

 * + RV stands for 'return value', and is aliased to rax
 * + TMP1-6 are the 6 shared caller-saved registers
 * + ARG1-4 are (different) aliases for argument registers
 * + TC, CU, WORK are registers that hold interpreter variables; these are callee-
 *   saved registers set up at entry and restored at exit
 * + TMP5 (r10) is also aliased as FUNCTION; it never conflicts with an argument
 *   register, and neither does TMP6.
 * + The suffixes d, w, and b stand for the 4, 2, and 1 byte-width value of the
 *   registers.

 * Note that the current convention for function calls is to load the function
 * pointer as a 64 bit value in a register from the machine code, and call on
 * that register. This is not ideal, but call doesn't natively take 64 bit
 * values, and it is neccesary to ensure that the function lives within 32 bits
 * distance from the function otherwise. Other methods are being considered.

 * LABELS:

 * Don't use dynamic labels in this code, unless they have been passed to you
 * from outside. Dynamic labels need to be allocated and not conflict, hence
 * just picking one is typically unsafe. Local labels are usually safe.

 * WRITE BARRIERS:

 * Use of write barriers is tricky, because they may involve a function call, and
 * that may or may not mean you have to store your temporaries on the stack.
 * Hence, a write barrier (MVM_ASSIGN_REF) is split into two parts:

 * + check_wb (root, value, label)
 * + hit_wb (root, value)

 * You should have the label parameter point somewhere after hit_wb, and save
 * and restore your temporaries around the hit_wb.
 **/


|.arch x64
|.actionlist actions
|.section code, data
|.globals MVM_JIT_LABEL_

#if MVM_JIT_LABEL__MAX > MVM_JIT_MAX_GLOBALS
#error "Not enough space for labels"
#endif

/* type declarations */
|.type REGISTER, MVMRegister
|.type FRAME, MVMFrame
|.type ARGCTX, MVMArgProcContext
|.type CALLSITEPTR, MVMCallsite*
|.type CAPTURE, MVMCallCapture
|.type CAPTUREBODY, MVMCallCaptureBody
|.type ARGPROCCONTEXT, MVMArgProcContext
|.type STATICFRAME, MVMStaticFrame
|.type P6OPAQUE, MVMP6opaque
|.type P6OBODY, MVMP6opaqueBody
|.type MVMITER, MVMIter
|.type MVMINSTANCE, MVMInstance
|.type MVMACTIVEHANDLERS, MVMActiveHandler
|.type OBJECT, MVMObject
|.type STOOGE, MVMObjectStooge
|.type VMARRAY, MVMArray
|.type COLLECTABLE, MVMCollectable
|.type STABLE, MVMSTable
|.type REPR, MVMREPROps
|.type STRING, MVMString
|.type OBJECTPTR, MVMObject*
|.type CONTEXT, MVMContext
|.type CONTAINERSPEC, MVMContainerSpec
|.type STORAGESPEC, MVMStorageSpec
|.type HLLCONFIG, MVMHLLConfig;
|.type SCREF, MVMSerializationContext
|.type SCREFBODY, MVMSerializationContextBody
|.type NFGSYNTH, MVMNFGSynthetic
|.type CODE, MVMCode
|.type BIGINTBODY, MVMP6bigintBody
|.type U8, MVMuint8
|.type U16, MVMuint16
|.type U32, MVMuint32
|.type U64, MVMuint64
|.type MPINT, mp_int


/* Static allocation of relevant types to registers. I pick
 * callee-save registers for efficiency. It is likely we'll be calling
 * quite a C functions, and this saves us the trouble of storing
 * them. Moreover, C compilers preferentially do not use callee-saved
 * registers, and so in most cases, these won't be touched at all. */
|.type TC, MVMThreadContext, r14
/* Alternative base pointer. I'll be using this often, so picking rbx
 * here rather than the extended registers will lead to smaller
 * bytecode */
|.type WORK, MVMRegister, rbx
|.type CU, MVMCompUnit, r13




MVMint32 MVM_jit_support(void) {
#ifdef __i386__
    /* Usually, this file should only be compiled only on a amd64
       platform; but when compiling 'fat' or 'universal' binaries, we
       may compile it for other platform. In this case we use the
       runtime check to disable the JIT */
    return 0;
#else
    return 1;
#endif
}

const unsigned char * MVM_jit_actions(void) {
    return actions;
}

/* C Call argument registers */
|.if WIN32
|.define ARG1, rcx
|.define ARG2, rdx
|.define ARG3, r8
|.define ARG4, r9
|.define ARG5, qword [rsp+0x20]
|.define ARG6, qword [rsp+0x28]
|.else
|.define ARG1, rdi
|.define ARG2, rsi
|.define ARG3, rdx
|.define ARG4, rcx
|.define ARG5, r8
|.define ARG6, r9
|.endif

/* C call argument registers for floating point */
|.if WIN32
|.define ARG1F, xmm0
|.define ARG2F, xmm1
|.define ARG3F, xmm2
|.define ARG4F, xmm3
|.else
|.define ARG1F, xmm0
|.define ARG2F, xmm1
|.define ARG3F, xmm2
|.define ARG4F, xmm3
|.define ARG5F, xmm4
|.define ARG6F, xmm5
|.define ARG7F, xmm6
|.define ARG8F, xmm7
|.endif

/* Special register for the function to be invoked
 * (chosen because it isn't involved in argument passing
 *  and volatile) */
|.define FUNCTION, r10
/* all-purpose temporary registers */
|.define TMP1, rcx
|.define TMP2, rdx
|.define TMP3, r8
|.define TMP4, r9
|.define TMP5, r10
|.define TMP6, r11
/* same, but 32 bits wide */
|.define TMP1d, ecx
|.define TMP2d, edx
|.define TMP3d, r8d
|.define TMP4d, r9d
|.define TMP5d, r10d
|.define TMP6d, r11d
/* and 16 bits wide */
|.define TMP1w, cx
|.define TMP2w, dx
|.define TMP3w, r8w
|.define TMP4w, r9w
|.define TMP5w, r10w
|.define TMP6w, r11w
/* and 8 bits for good measure */
|.define TMP1b, cl
|.define TMP2b, dl
|.define TMP3b, r8b
|.define TMP4b, r9b
|.define TMP5b, r10b
|.define TMP6b, r11b


/* return value */
|.define RV, rax
|.define RVd, eax
|.define RVF, xmm0


|.macro callp, funcptr
|.data
|5:
|.dword (MVMuint32)((uintptr_t)(funcptr)), (MVMuint32)((uintptr_t)(funcptr) >> 32);
|.code
| call qword [<5];
|.endmacro


|.macro check_wb, root, ref, lbl;
| test word COLLECTABLE:root->flags, MVM_CF_SECOND_GEN;
| jz lbl;
| test ref, ref;
| jz lbl;
| test word COLLECTABLE:ref->flags, MVM_CF_SECOND_GEN;
| jnz lbl;
|.endmacro;

|.macro hit_wb, obj, value
| mov ARG3, value
| mov ARG2, obj;
| mov ARG1, TC;
| callp &MVM_gc_write_barrier_hit_by;
|.endmacro

|.macro get_spesh_slot, reg, idx;
| mov reg, TC->cur_frame;
| mov reg, FRAME:reg->effective_spesh_slots;
| mov reg, OBJECTPTR:reg[idx];
|.endmacro

|.macro get_vmnull, reg
| mov reg, TC->instance;
| mov reg, MVMINSTANCE:reg->VMNull;
|.endmacro

|.macro get_cur_op, reg
| mov reg, TC->interp_cur_op
| mov reg, [reg]
|.endmacro

|.macro get_string, reg, idx
|| MVM_cu_ensure_string_decoded(tc, jg->sg->sf->body.cu, idx);
| mov reg, CU->body.strings;
| mov reg, OBJECTPTR:reg[idx];
|.endmacro

|.macro test_type_object, reg
| test word OBJECT:reg->header.flags, MVM_CF_TYPE_OBJECT
|.endmacro

|.macro gc_sync_point
| cmp qword TC->gc_status, 0;
| je >1;
| mov ARG1, TC;
| callp &MVM_gc_enter_from_interrupt;
|1:
|.endmacro

|.macro throw_adhoc, msg
| mov ARG1, TC;
| mov64 ARG2, (uintptr_t)(msg);
| callp &MVM_exception_throw_adhoc;
|.endmacro

|.macro get_stable, out, in
| mov out, aword OBJECT:in->st;
|.endmacro

|.macro get_repr, out, in
| get_stable out, in;
| mov out, aword STABLE:out->REPR;
|.endmacro

|.macro cmp_repr_id, obj, tmp, id
| get_repr tmp, obj;
| cmp dword REPR:tmp->ID, id;
|.endmacro

|.define FRAME_NR, dword [rbp-0x20]

/* A function prologue is always the same in x86 / x64, because
 * we do not provide variable arguments, instead arguments are provided
 * via a frame. All JIT entry points receive a prologue. */

void MVM_jit_emit_prologue(MVMThreadContext *tc, MVMJitCompiler *compiler,
                           MVMJitGraph *jg) {
    |.code
    /* Setup stack */
    | push rbp;
    | mov rbp, rsp;
    /* allocate stack space: 0x100 bytes = 256 bytes
     *
     * layout: [ a: 0x20 | b: 0x40 | c: 0xa0 | d: 0x20 ]
     * a: space for 4 callee-save registers
     * b: small scratch space
     * c: space for stack arguments to c calls
     * d: reserve space for GPR registers to c calls (win64) or more space for
     * stack arguments (posix) */
    | sub rsp, 0x100;
    /* save callee-save registers */
    | mov [rbp-0x8],  TC;
    | mov [rbp-0x10], CU;
    | mov [rbp-0x18], WORK;
    /* setup special frame variables */
    | mov TC,   ARG1;
    | mov CU,   ARG2;
    | mov TMP6, TC->cur_frame;
    | mov WORK, FRAME:TMP6->work;
    /* If in the future we call a function, the return address into the JIT
     * frame will be stored in this position. */
    if (!jg->no_trampoline) {
        | lea rax, [rsp-0x8];
        | mov aword TC->jit_return_address, rax;
    }
    /* ARG3 contains our 'entry label' */
    | jmp ARG3
}

/* And a function epilogue is also always the same */
void MVM_jit_emit_epilogue(MVMThreadContext *tc, MVMJitCompiler *compiler, MVMJitGraph *jg) {
    | ->exit:
    /* clear the return address, so that we know there's no longer a JIT frame
     * on the stack */
    if (!jg->no_trampoline) {
        | mov aword TC->jit_return_address, 0;
    }
    /* restore callee-save registers */
    | mov TC, [rbp-0x8];
    | mov CU, [rbp-0x10];
    | mov WORK, [rbp-0x18];
    /* Restore stack */
    | mov rsp, rbp;
    | pop rbp;
    | ret;
}

static MVMuint64 try_emit_gen2_ref(MVMThreadContext *tc, MVMJitCompiler *compiler,
                                   MVMJitGraph *jg, MVMObject *obj, MVMint16 reg) {
    if (!(obj->header.flags & MVM_CF_SECOND_GEN))
        return 0;
    | mov64 TMP1, (uintptr_t)obj;
    | mov WORK[reg], TMP1;
    return 1;
}

static MVMint64 fits_in_32_bit(MVMint64 number) {
    /* Used to determine if a 64 bit integer can be safely used as a
     * 32 bit constant for immediate mode access */
    return (number >= INT32_MIN) && (number <= INT32_MAX);
}

static void emit_fastcreate(MVMThreadContext *tc, MVMJitCompiler *compiler, MVMJitGraph *jg,
                            MVMSpeshIns *ins) {
    MVMuint16 size     = ins->operands[1].lit_i16;
    MVMint16 spesh_idx = ins->operands[2].lit_i16;
    | mov ARG1, TC;
    | mov ARG2, size;
    | callp &MVM_gc_allocate_nursery;
    | get_spesh_slot TMP1, spesh_idx;
    | mov aword OBJECT:RV->st, TMP1;  // st is 64 bit (pointer)
    | mov word OBJECT:RV->header.size, size; // object size is 16 bit
    | mov TMP1d, dword TC->thread_id;  // thread id is 32 bit
    | mov dword OBJECT:RV->header.owner, TMP1d; // does this even work?
}

/* compile per instruction, can't really do any better yet */
void MVM_jit_emit_primitive(MVMThreadContext *tc, MVMJitCompiler *compiler, MVMJitGraph *jg,
                            MVMJitPrimitive * prim) {
    MVMSpeshIns *ins = prim->ins;
    MVMuint16 op = ins->info->opcode;
    /* Quite a few of these opcodes are copies. Ultimately, I want to
     * move copies to their own node (MVMJitCopy or such), and reduce
     * the number of copies (and thereby increase the efficiency), but
     * currently that isn't really feasible. */
    switch (op) {
    case MVM_OP_const_i64_16:
    case MVM_OP_const_i64_32: {
        MVMint32 reg = ins->operands[0].reg.orig;
        /* Upgrade to 64 bit */
        MVMint64 val = (op == MVM_OP_const_i64_16 ? (MVMint64)ins->operands[1].lit_i16 :
                        (MVMint64)ins->operands[1].lit_i32);
        | mov qword WORK[reg], val;
        break;
    }
    case MVM_OP_const_i64: {
        MVMint32 reg = ins->operands[0].reg.orig;
        MVMint64 val = ins->operands[1].lit_i64;
        | mov64 TMP1, val;
        | mov WORK[reg], TMP1;
        break;
    }
    case MVM_OP_const_n64: {
        MVMint16 reg = ins->operands[0].reg.orig;
        MVMint64 valbytes = ins->operands[1].lit_i64;
        | mov64 TMP1, valbytes;
        | mov WORK[reg], TMP1;
        break;
    }
    case MVM_OP_inf:
    case MVM_OP_neginf:
    case MVM_OP_nan: {
        MVMint16 reg = ins->operands[0].reg.orig;
        MVMRegister tmp;
        if (op == MVM_OP_nan)
            tmp.n64 = MVM_num_nan(tc);
        else if (op == MVM_OP_inf)
            tmp.n64 = MVM_num_posinf(tc);
        else if (op == MVM_OP_neginf)
            tmp.n64 = MVM_num_neginf(tc);
        | mov64 TMP1, tmp.i64;
        | mov WORK[reg], TMP1;
        break;
    }
    case MVM_OP_const_s: {
         MVMint16 reg = ins->operands[0].reg.orig;
         MVMuint32 idx = ins->operands[1].lit_str_idx;
         MVMStaticFrame *sf = jg->sg->sf;
         MVMString * s = MVM_cu_string(tc, sf->body.cu, idx);
         if (!try_emit_gen2_ref(tc, compiler, jg, (MVMObject*)s, reg)) {
             | get_string TMP1, idx;
             | mov WORK[reg], TMP1;
         }
         break;
    }
    case MVM_OP_null: {
        MVMint16 reg = ins->operands[0].reg.orig;
        | get_vmnull TMP1;
        | mov WORK[reg], TMP1;
        break;
    }
    case MVM_OP_getwhat:
    case MVM_OP_getwho: {
        MVMint16 dst = ins->operands[0].reg.orig;
        MVMint16 obj = ins->operands[1].reg.orig;
        | mov TMP1, WORK[obj];
        | mov TMP1, OBJECT:TMP1->st;
        if (op == MVM_OP_getwho) {
            | mov TMP1, STABLE:TMP1->WHO;
            | get_vmnull TMP2;
            | test TMP1, TMP1;
            | cmovz TMP1, TMP2;
        } else {
            | mov TMP1, STABLE:TMP1->WHAT;
        }
        | mov WORK[dst], TMP1;
        break;
    }
    case MVM_OP_getlex:
    case MVM_OP_sp_getlex_o:
    case MVM_OP_sp_getlex_ins: {
        MVMuint16 *lexical_types;
        MVMStaticFrame * sf = jg->sg->sf;
        MVMint16 dst = ins->operands[0].reg.orig;
        MVMint16 idx = ins->operands[1].lex.idx;
        MVMint16 out = ins->operands[1].lex.outers;
        MVMint16 i;
        | mov TMP6, TC->cur_frame;
        for (i = 0; i < out; i++) {
            /* I'm going to skip compiling the check whether the outer
             * node really exists, because if the code has run N times
             * correctly, then the outer frame must have existed then,
             * and since this chain is static, it should still exist
             * now.  If it doesn't exist, that means we crash.
             *
             * NB: inlining /might/ make this all wrong! But, if that
             * happens, the interpreter will panic even without JIT */
            | mov TMP6, FRAME:TMP6->outer;
            sf = sf->body.outer;
        }
        /* get array of lexicals */
        | mov TMP5, FRAME:TMP6->env;
        /* read value */
        | mov TMP5, REGISTER:TMP5[idx];
        /* it seems that if at runtime, if the outer frame has been inlined,
         * this /could/ be wrong. But if that is so, the interpreted instruction
         * would also be wrong, because it'd refer to the wrong lexical. */
        lexical_types = (!out && jg->sg->lexical_types ?
                         jg->sg->lexical_types :
                         sf->body.lexical_types);
        if (lexical_types[idx] == MVM_reg_obj) {
            /* if it is zero, check if we need to auto-vivify */
            | test TMP5, TMP5;
            | jnz >1;
            /* setup args */
            | mov ARG1, TC;
            | mov ARG2, TMP6;
            | mov ARG3, idx;
            | callp &MVM_frame_vivify_lexical;
            /* use return value for the result */
            | mov TMP5, RV;
            |1:
        }
        /* store the value */
        | mov WORK[dst], TMP5;
        break;
    }
    case MVM_OP_sp_getlexvia_o:
    case MVM_OP_sp_getlexvia_ins: {
        MVMint16 dst = ins->operands[0].reg.orig;
        MVMint16 idx = ins->operands[1].lit_ui16;
        MVMint16 out = ins->operands[2].lit_ui16;
        MVMint16 via = ins->operands[3].reg.orig;
        MVMint16 i;
        /* Resolve the frame. */
        | mov TMP6, WORK[via];
        | mov TMP6, CODE:TMP6->body.outer;
        for (i = 1; i < out; i++) /* From 1 as we are already at outer */
            | mov TMP6, FRAME:TMP6->outer;
        /* get array of lexicals */
        | mov TMP5, FRAME:TMP6->env;
        /* read value */
        | mov TMP5, REGISTER:TMP5[idx];
        if (op == MVM_OP_sp_getlexvia_o) {
            /* if it is zero, check if we need to auto-vivify */
            | test TMP5, TMP5;
            | jnz >1;
            /* setup args */
            | mov ARG1, TC;
            | mov ARG2, TMP6;
            | mov ARG3, idx;
            | callp &MVM_frame_vivify_lexical;
            /* use return value for the result */
            | mov TMP5, RV;
            |1:
        }
        /* store the value */
        | mov WORK[dst], TMP5;
        break;
    }
    case MVM_OP_sp_bindlexvia_os:
    case MVM_OP_sp_bindlexvia_in: {
        MVMint16 idx = ins->operands[0].lit_ui16;
        MVMint16 out = ins->operands[1].lit_ui16;
        MVMint16 via = ins->operands[2].reg.orig;
        MVMint16 src = ins->operands[3].reg.orig;
        MVMint16 i;
        /* Resolve the frame. */
        | mov TMP1, WORK[via];
        | mov TMP1, CODE:TMP1->body.outer;
        for (i = 1; i < out; i++) /* From 1 as we are already at outer */
            | mov TMP1, FRAME:TMP1->outer;
        /* get array of lexicals */
        | mov TMP2, FRAME:TMP1->env;
        /* bind the lexical, and write barrier if needed */
        | mov TMP3, WORK[src]
        | mov REGISTER:TMP2[idx], TMP3;
        if (op == MVM_OP_sp_bindlexvia_os) {
            | check_wb TMP1, TMP3, >2;
            | hit_wb TMP1, TMP3;
            |2:
        }
        break;
    }
    case MVM_OP_getlexreldyn: {
        MVMint16 dst  = ins->operands[0].reg.orig;
        MVMint16 ctx  = ins->operands[1].reg.orig;
        MVMint16 name = ins->operands[2].reg.orig;
        | mov TMP5, aword WORK[ctx];
        | cmp_repr_id TMP5, TMP6, MVM_REPR_ID_MVMContext;
        | je >1;
        | test_type_object TMP5;
        | jz >1;
        | throw_adhoc "getlexreldyn needs a context"
        |1:
        | mov ARG2, TMP5
        | mov ARG1, TC
        | mov ARG3, aword WORK[name];
        | callp &MVM_context_dynamic_lookup;
        | mov WORK[dst], RV;
        break;
    }
    case MVM_OP_getlex_no:
    case MVM_OP_sp_getlex_no: {
        MVMint16  dst = ins->operands[0].reg.orig;
        MVMuint32 idx = ins->operands[1].lit_str_idx;
        | mov ARG1, TC;
        | get_string ARG2, idx;
        | mov ARG3, MVM_reg_obj;
        | callp &MVM_frame_find_lexical_by_name;
        | test RV, RV;
        | jz >1;
        | mov RV, [RV];
        | mov WORK[dst], RV;
        | jmp >2;
        |1:
        | get_vmnull TMP3;
        | mov WORK[dst], TMP3;
        |2:
        break;
    }
    case MVM_OP_bindlex:
    case MVM_OP_sp_bindlex_os:
    case MVM_OP_sp_bindlex_in: {
        MVMuint16 *lexical_types;
        MVMStaticFrame *sf = jg->sg->sf;
        MVMint16 idx = ins->operands[0].lex.idx;
        MVMint16 out = ins->operands[0].lex.outers;
        MVMint16 src = ins->operands[1].reg.orig;
        MVMint16 i;
        | mov TMP1, TC->cur_frame;
        for (i = 0; i < out; i++) {
            | mov TMP1, FRAME:TMP1->outer;
            sf = sf->body.outer;
        }
        lexical_types = (!out && jg->sg->lexical_types ?
                         jg->sg->lexical_types :
                         sf->body.lexical_types);
        | mov TMP2, FRAME:TMP1->env;
        | mov TMP3, WORK[src];
        | mov REGISTER:TMP2[idx], TMP3;
        if (lexical_types[idx] == MVM_reg_obj ||
            lexical_types[idx] == MVM_reg_str) {
            | check_wb TMP1, TMP3, >2;
            | hit_wb TMP1, TMP3;
            |2:
        }
        break;
    }
    case MVM_OP_lexprimspec: {
        MVMint16 dst = ins->operands[0].reg.orig;
        MVMint16 ctx = ins->operands[1].reg.orig;
        MVMint16 name = ins->operands[2].reg.orig;
        | mov TMP1, WORK[ctx];
        | cmp_repr_id, TMP1, TMP2, MVM_REPR_ID_MVMContext;
        | jne >1;
        | test_type_object TMP1;
        | jz >2;
        |1:
        | throw_adhoc "lexprimspec needs a context";
        |2:
        | mov ARG2, CONTEXT:TMP1->body.context;
        | mov ARG3, WORK[name];
        | mov ARG1, TC;
        | callp &MVM_frame_lexical_primspec;
        | mov WORK[dst], RV;
        break;
    }
    case MVM_OP_getarg_o:
    case MVM_OP_getarg_n:
    case MVM_OP_getarg_s:
    case MVM_OP_getarg_i: {
        MVMuint16 reg = ins->operands[0].reg.orig;
        MVMuint16 idx = ins->operands[1].reg.orig;
        | mov TMP1, TC->cur_frame;
        | mov TMP1, FRAME:TMP1->args;
        | mov TMP1, REGISTER:TMP1[idx];
        | mov WORK[reg], TMP1;
        break;
    }
    case MVM_OP_sp_getarg_o:
    case MVM_OP_sp_getarg_n:
    case MVM_OP_sp_getarg_s:
    case MVM_OP_sp_getarg_i: {
        MVMint32 reg = ins->operands[0].reg.orig;
        MVMuint16 idx = ins->operands[1].callsite_idx;
        | mov TMP1, TC->cur_frame;
        | mov TMP1, FRAME:TMP1->params.args;
        | mov TMP1, REGISTER:TMP1[idx];
        | mov WORK[reg], TMP1;
        break;
    }
    case MVM_OP_sp_p6oget_i:
    case MVM_OP_sp_p6oget_i32:
    case MVM_OP_sp_p6oget_n:
    case MVM_OP_sp_p6oget_s:
    case MVM_OP_sp_p6oget_bi:
    case MVM_OP_sp_p6oget_o:
    case MVM_OP_sp_p6ogetvc_o:
    case MVM_OP_sp_p6ogetvt_o:
    case MVM_OP_sp_getvc_o:
    case MVM_OP_sp_getvt_o: {
        MVMint16 dst    = ins->operands[0].reg.orig;
        MVMint16 obj    = ins->operands[1].reg.orig;
        MVMint16 body = offsetof(MVMP6opaque, body);
        MVMint16 offset = ins->operands[2].lit_i16;
        /* load address and object */
        | mov TMP1, WORK[obj];
        if (op == MVM_OP_sp_getvc_o || op == MVM_OP_sp_getvt_o) {
            | lea TMP2, [TMP1 + offset];
            body = 0; /* Offset is already from the start of the object */
        }
        else {
            | lea TMP2, [TMP1 + (offset + body)];
            | mov TMP4, P6OPAQUE:TMP1->body.replaced;
            | lea TMP5, [TMP4 + offset];
            | test TMP4, TMP4;
            | cmovnz TMP2, TMP5;
        }
        /* TMP2 now contains address of item */
        if (op == MVM_OP_sp_p6oget_o) {
            | mov TMP3, [TMP2];
            | test TMP3, TMP3;
            /* Check if object doesn't point to NULL */
            | jnz >3;
            /* Otherwise load VMNull */
            | get_vmnull TMP3;
            |3:
        } else if (op == MVM_OP_sp_p6ogetvt_o || op == MVM_OP_sp_getvt_o) {
            /* vivify as type object */
            MVMint16 spesh_idx = ins->operands[3].lit_i16;
            MVMCollectable *spesh_value = jg->sg->spesh_slots[spesh_idx];
            | mov TMP3, [TMP2];
            /* check for null */
            | test TMP3, TMP3;
            | jnz >4;
            /* if null, vivify as type object from spesh slot */
            | get_spesh_slot TMP3, spesh_idx;
            if (!(spesh_value->flags & MVM_CF_SECOND_GEN)) {
                /* need to hit write barrier? */
                | check_wb TMP1, TMP3, >3;
                | mov qword [rbp-0x28], TMP2; // address
                | mov qword [rbp-0x30], TMP3; // value
                | hit_wb WORK[obj], TMP3; // write barrier for header
                | mov TMP3, qword [rbp-0x30];
                | mov TMP2, qword [rbp-0x28];
                |3:
            }
            /* store vivified type value in memory location */
            | mov qword [TMP2], TMP3;
            |4:
        } else if (op == MVM_OP_sp_p6ogetvc_o || op == MVM_OP_sp_getvc_o) {
            MVMint16 spesh_idx = ins->operands[3].lit_i16;
            | mov TMP3, [TMP2];
            | test TMP3, TMP3;
            | jnz >4;
            /* vivify as clone */
            | mov ARG1, TC;
            | get_spesh_slot ARG2, spesh_idx;
            | callp &MVM_repr_clone;
            | mov TMP3, RV;
            /* reload object and address */
            | mov TMP1, WORK[obj];
            | lea TMP2, [TMP1 + (offset + body)];
            | mov TMP4, P6OPAQUE:TMP1->body.replaced;
            | lea TMP5, [TMP4 + offset];
            | test TMP4, TMP4;
            | cmovnz TMP2, TMP5;
            /* assign with write barrier */
            | check_wb TMP1, TMP3, >3;
            | mov qword [rbp-0x28], TMP2; // address
            | mov qword [rbp-0x30], TMP3; // value
            | hit_wb WORK[obj], TMP3; // write barrier for header
            | mov TMP3, qword [rbp-0x30];
            | mov TMP2, qword [rbp-0x28];
            |3:
            | mov qword [TMP2], TMP3;
            /* done */
            |4:
        } else if (op == MVM_OP_sp_p6oget_bi) {
            /* Big integer case */
            | mov TMP4d, BIGINTBODY:TMP2->u.smallint.flag;
            | cmp TMP4d, dword MVM_BIGINT_32_FLAG;
            | jne >5;
            | movsxd TMP3, dword BIGINTBODY:TMP2->u.smallint.value;
            | jmp >6;
            | 5:
            | mov ARG1, TC;
            | mov ARG2, TMP2;
            | callp &MVM_p6bigint_get_int64;
            | mov TMP3, RV;
            | 6:
        } else if (op == MVM_OP_sp_p6oget_i32) {
            | movsxd TMP3, dword [TMP2];
        }
        else {
            /* the regular case */
            | mov TMP3, [TMP2];
        }
        /* store in local register */
        | mov WORK[dst], TMP3;
        break;
    }
    case MVM_OP_sp_bind_i64:
    case MVM_OP_sp_bind_i32:
    case MVM_OP_sp_bind_n:
    case MVM_OP_sp_bind_s:
    case MVM_OP_sp_bind_s_nowb:
    case MVM_OP_sp_bind_o: {
        MVMint16 obj    = ins->operands[0].reg.orig;
        MVMint16 offset = ins->operands[1].lit_i16;
        MVMint16 val    = ins->operands[2].reg.orig;
        | mov TMP1, WORK[obj];            // object
        | mov TMP2, WORK[val];            // value
        if (op == MVM_OP_sp_bind_o || op == MVM_OP_sp_bind_s) {
            /* check if we should hit write barrier */
            | check_wb TMP1, TMP2, >2;
            /* note: it is uneccesary to store pointers, because they
               can just be loaded from memory */
            | hit_wb WORK[obj], WORK[val];
            | mov TMP1, aword WORK[obj]; // reload object
            | mov TMP2, aword WORK[val]; // reload value
            |2: // done
        }
        if (op == MVM_OP_sp_bind_i32) {
            | mov dword [TMP1+offset], TMP2d; // store value into body
        }
        else {
            | mov qword [TMP1+offset], TMP2; // store value into body
        }
        break;
    }
    case MVM_OP_sp_get_i64:
    case MVM_OP_sp_get_n:
    case MVM_OP_sp_get_s: {
        MVMint16 dst    = ins->operands[0].reg.orig;
        MVMint16 obj    = ins->operands[1].reg.orig;
        MVMint16 offset = ins->operands[2].lit_i16;
        | mov TMP1, WORK[obj];             // object
        | mov TMP2, qword [TMP1+offset];   // get value from body
        | mov WORK[dst], TMP2;
        break;
    }
    case MVM_OP_sp_get_o: {
        MVMint16 dst    = ins->operands[0].reg.orig;
        MVMint16 obj    = ins->operands[1].reg.orig;
        MVMint16 offset = ins->operands[2].lit_i16;
        | mov TMP1, WORK[obj];             // object
        | mov TMP2, qword [TMP1+offset];   // get value from body
        | test TMP2, TMP2;
        | jnz >1;
        | get_vmnull TMP2;
        |1:
        | mov WORK[dst], TMP2;
        break;
    }
    case MVM_OP_sp_get_i32: {
        MVMint16 dst    = ins->operands[0].reg.orig;
        MVMint16 obj    = ins->operands[1].reg.orig;
        MVMint16 offset = ins->operands[2].lit_i16;
        | mov TMP1, WORK[obj];            // object
        | mov RV, [TMP1+offset];   // get value from body
        | cdqe;
        | mov WORK[dst], RV;
        break;
    }
    case MVM_OP_sp_deref_bind_i64:
    case MVM_OP_sp_deref_bind_n: {
        MVMint16 obj    = ins->operands[0].reg.orig;
        MVMint16 val    = ins->operands[1].reg.orig;
        MVMint16 offset = ins->operands[2].lit_i16;
        | mov TMP1, WORK[obj];            // object
        | mov TMP2, WORK[val];            // value
        | mov TMP1, qword [TMP1+offset];  // find address of target
        | mov qword [TMP1], TMP2;
        break;
    }
    case MVM_OP_sp_deref_get_i64:
    case MVM_OP_sp_deref_get_n: {
        MVMint16 dst    = ins->operands[0].reg.orig;
        MVMint16 obj    = ins->operands[1].reg.orig;
        MVMint16 offset = ins->operands[2].lit_i16;
        | mov TMP1, WORK[obj];            // object
        | mov TMP3, qword [TMP1+offset];  // get value pointer from body
        | mov TMP2, qword [TMP3];         // deref the pointer
        | mov WORK[dst], TMP2;
        break;
    }
    case MVM_OP_sp_p6obind_i:
    case MVM_OP_sp_p6obind_i32:
    case MVM_OP_sp_p6obind_n:
    case MVM_OP_sp_p6obind_s:
    case MVM_OP_sp_p6obind_o: {
        MVMint16 obj    = ins->operands[0].reg.orig;
        MVMint16 offset = ins->operands[1].lit_i16;
        MVMint16 val    = ins->operands[2].reg.orig;
        | mov TMP1, WORK[obj];            // object
        | mov TMP2, WORK[val];            // value
        | lea TMP3, P6OPAQUE:TMP1->body;  // body
        | cmp qword P6OBODY:TMP3->replaced, 0;
        | je >1;
        | mov TMP3, P6OBODY:TMP3->replaced; // replaced object body
        |1:
        if (op == MVM_OP_sp_p6obind_o || op == MVM_OP_sp_p6obind_s) {
            /* check if we should hit write barrier */
            | check_wb TMP1, TMP2, >2;
            | mov qword [rbp-0x28], TMP2; // store value
            | mov qword [rbp-0x30], TMP3; // store body pointer
            | hit_wb WORK[obj], WORK[val];
            | mov TMP3, qword [rbp-0x30]; // restore body pointer
            | mov TMP2, qword [rbp-0x28]; // restore value
            |2: // done
        }
        if (op == MVM_OP_sp_p6obind_i32) {
            | mov dword [TMP3+offset], TMP2d; // store value into body
        }
        else {
            | mov [TMP3+offset], TMP2; // store value into body
        }
        break;
    }
    case MVM_OP_getwhere:
    case MVM_OP_set: {
         MVMint32 reg1 = ins->operands[0].reg.orig;
         MVMint32 reg2 = ins->operands[1].reg.orig;
         | mov TMP1, WORK[reg2];
         | mov WORK[reg1], TMP1;
         break;
    }
    case MVM_OP_sp_getspeshslot: {
        MVMint16 dst = ins->operands[0].reg.orig;
        MVMint16 spesh_idx = ins->operands[1].lit_i16;
        | get_spesh_slot TMP1, spesh_idx;
        | mov WORK[dst], TMP1;
        break;
    }
    case MVM_OP_setdispatcher: {
        MVMint16 src = ins->operands[0].reg.orig;
        | mov TMP1, aword WORK[src];
        | mov aword TC->cur_dispatcher, TMP1;
        break;
    }
    case MVM_OP_takedispatcher: {
        MVMint16 dst = ins->operands[0].reg.orig;
        | mov TMP1, aword TC->cur_dispatcher;
        | cmp TMP1, 0;
        | je >2;
        | mov TMP2, aword TC->cur_dispatcher_for;
        | cmp TMP2, 0;
        | je >1;
        | mov TMP3, TC->cur_frame;
        | mov TMP3, FRAME:TMP3->code_ref;
        | cmp TMP2, TMP3;
        | jne >2;
        |1:
        | mov aword WORK[dst], TMP1;
        | mov aword TC->cur_dispatcher, NULL;
        | jmp >3;
        |2:
        | get_vmnull TMP1;
        | mov aword WORK[dst], TMP1;
        |3:
        break;
    }
    case MVM_OP_ctx: {
        MVMint16 dst = ins->operands[0].reg.orig;
        | mov ARG1, TC;
        | mov ARG2, TC->cur_frame;
        | callp &MVM_context_from_frame;
        | mov WORK[dst], RV;
        break;
    }
    case MVM_OP_ctxlexpad: {
        MVMint16 src = ins->operands[1].reg.orig;
        MVMint16 dst = ins->operands[0].reg.orig;
        | mov TMP1, WORK[src];
        | test_type_object TMP1;
        | jnz >1;
        | cmp_repr_id TMP1, TMP2, MVM_REPR_ID_MVMContext;
        | je >2;
        |1:
        | throw_adhoc "ctxlexpad needs an MVMContext";
        |2:
        | mov WORK[dst], TMP1;
        break;
    }
    case MVM_OP_ctxcallerskipthunks: {
        MVMint16 dst = ins->operands[0].reg.orig;
        MVMint16 ctx = ins->operands[1].reg.orig;
        /* ctx = NULL */
        | xor RV, RV;
        | mov TMP5, aword WORK[ctx];
        /* check concrete instance of type or throw */
        | test_type_object TMP5;
        | jnz >1;
        | cmp_repr_id TMP5, TMP6, MVM_REPR_ID_MVMContext;
        | jne >1;
        /* Call function to create context. */
        | mov ARG1, TC
        | mov ARG2, TMP5
        | mov ARG3, MVM_CTX_TRAV_CALLER_SKIP_THUNKS
        | callp &MVM_context_apply_traversal;
        | mov WORK[dst], RV;
        | jmp >2;
        |1:
        | throw_adhoc "ctxcallerskipthunks needs an MVMContext";
        |2:
        break;
    }
    case MVM_OP_curcode: {
        MVMint16 dst = ins->operands[0].reg.orig;
        | mov TMP1, TC->cur_frame;
        | mov TMP1, aword FRAME:TMP1->code_ref;
        | mov aword WORK[dst], TMP1;
        break;
    }
    case MVM_OP_getcode: {
        MVMint16 dst = ins->operands[0].reg.orig;
        MVMuint16 idx = ins->operands[1].coderef_idx;
        | mov TMP1, aword CU->body.coderefs;
        | mov TMP1, aword OBJECTPTR:TMP1[idx];
        | mov aword WORK[dst], TMP1;
        break;
    }
    case MVM_OP_hllboxtype_n:
    case MVM_OP_hllboxtype_s:
    case MVM_OP_hllboxtype_i: {
        MVMint16 dst = ins->operands[0].reg.orig;
        | mov TMP1, CU->body.hll_config;
        if (op == MVM_OP_hllboxtype_n) {
            | mov TMP1, aword HLLCONFIG:TMP1->num_box_type;
        } else if (op == MVM_OP_hllboxtype_s) {
            | mov TMP1, aword HLLCONFIG:TMP1->str_box_type;
        } else {
            | mov TMP1, aword HLLCONFIG:TMP1->int_box_type;
        }
        | mov aword WORK[dst], TMP1;
        break;
    }
    case MVM_OP_null_s: {
        MVMint16 dst = ins->operands[0].reg.orig;
        | mov qword WORK[dst], 0;
        break;
     }
    case MVM_OP_isnull_s: {
        MVMint16 dst = ins->operands[0].reg.orig;
        MVMint16 src = ins->operands[1].reg.orig;
        | mov TMP1, WORK[src];
        | test TMP1, TMP1;
        | setz TMP2b;
        | movzx TMP2, TMP2b;
        | mov qword WORK[dst], TMP2;
        break;
    }
    case MVM_OP_captureposarg: {
        MVMint16 dst = ins->operands[0].reg.orig;
        MVMint16 src = ins->operands[1].reg.orig;
        MVMint16 pos = ins->operands[2].reg.orig;
        | mov TMP5, qword WORK[src];
        | test_type_object TMP5;
        | jnz >1;
        | cmp_repr_id TMP5, TMP6, MVM_REPR_ID_MVMCallCapture;
        | jne >1;
        | mov ARG1, TC;
        | mov ARG2, aword CAPTURE:TMP5->body.apc;
        | mov ARG3, WORK[pos];
        | callp &MVM_args_get_required_pos_obj;
        | mov WORK[dst], RV;
        | jmp >2;
        |1:
        | throw_adhoc "captureposarg needs a MVMCallCapture";
        |2:
        break;
    }
    case MVM_OP_captureposarg_i: {
        MVMint16 dst = ins->operands[0].reg.orig;
        MVMint16 src = ins->operands[1].reg.orig;
        MVMint16 pos = ins->operands[2].reg.orig;
        | mov TMP5, qword WORK[src];
        | test_type_object TMP5;
        | jnz >1;
        | cmp_repr_id TMP5, TMP6, MVM_REPR_ID_MVMCallCapture;
        | jne >1;
        | mov ARG1, TC;
        | mov ARG2, aword CAPTURE:TMP5->body.apc;
        | mov ARG3, qword WORK[pos];
        | callp &MVM_args_get_required_pos_int;
        | mov WORK[dst], RV;
        | jmp >2;
        |1:
        | throw_adhoc "captureposarg_i needs a MVMCallCapture";
        |2:
        break;
    }
    case MVM_OP_captureposarg_n: {
        MVMint16 dst = ins->operands[0].reg.orig;
        MVMint16 src = ins->operands[1].reg.orig;
        MVMint16 pos = ins->operands[2].reg.orig;
        | mov TMP5, qword WORK[src];
        | test_type_object TMP5;
        | jnz >1;
        | cmp_repr_id TMP5, TMP6, MVM_REPR_ID_MVMCallCapture;
        | jne >1;
        | mov ARG1, TC;
        | mov ARG2, aword CAPTURE:TMP5->body.apc;
        | mov ARG3, qword WORK[pos];
        | callp &MVM_args_get_required_pos_num;
        | mov WORK[dst], RV;
        | jmp >2;
        |1:
        | throw_adhoc "captureposarg_n needs a MVMCallCapture";
        |2:
        break;
    }
    case MVM_OP_captureposarg_s: {
        MVMint16 dst = ins->operands[0].reg.orig;
        MVMint16 src = ins->operands[1].reg.orig;
        MVMint16 pos = ins->operands[2].reg.orig;
        | mov TMP5, qword WORK[src];
        | test_type_object TMP5;
        | jnz >1;
        | cmp_repr_id TMP5, TMP6, MVM_REPR_ID_MVMCallCapture;
        | jne >1;
        | mov ARG1, TC;
        | mov ARG2, aword CAPTURE:TMP5->body.apc;
        | mov ARG3, aword WORK[pos];
        | callp MVM_args_get_required_pos_str;
        | mov WORK[dst], RV;
        | jmp >2;
        |1:
        | throw_adhoc "captureposarg_s needs a MVMCallCapture";
        |2:
        break;
    }
    case MVM_OP_add_i:
    case MVM_OP_sub_i:
    case MVM_OP_bor_i:
    case MVM_OP_band_i:
    case MVM_OP_bxor_i: {
        MVMint32 reg_a = ins->operands[0].reg.orig;
        MVMint32 reg_b = ins->operands[1].reg.orig;
        MVMint32 reg_c = ins->operands[2].reg.orig;

        MVMSpeshFacts *operand_facts = MVM_spesh_get_facts(tc, jg->sg, ins->operands[2]);

        if (reg_a == reg_b) {
            if (operand_facts->flags & MVM_SPESH_FACT_KNOWN_VALUE &&
                fits_in_32_bit(operand_facts->value.i)) {
                MVMint64 value = operand_facts->value.i;
                MVM_spesh_graph_add_comment(tc, jg->sg, ins, "JIT optimized to memory/constant operation");
                switch(ins->info->opcode) {
                case MVM_OP_add_i:
                    | add qword WORK[reg_a], qword value;
                    break;
                case MVM_OP_sub_i:
                    | sub qword WORK[reg_a], qword value;
                    break;
                case MVM_OP_bor_i:
                    | or qword WORK[reg_a], qword value;
                    break;
                case MVM_OP_band_i:
                    | and qword WORK[reg_a], qword value;
                    break;
                case MVM_OP_bxor_i:
                    | xor qword WORK[reg_a], qword value;
                    break;
                }
            } else {
                MVM_spesh_graph_add_comment(tc, jg->sg, ins, "JIT optimized to memory/register operation");
                | mov rax, WORK[reg_c];
                switch(ins->info->opcode) {
                case MVM_OP_add_i:
                    | add WORK[reg_a], rax;
                    break;
                case MVM_OP_sub_i:
                    | sub WORK[reg_a], rax;
                    break;
                case MVM_OP_bor_i:
                    | or WORK[reg_a], rax;
                    break;
                case MVM_OP_band_i:
                    | and WORK[reg_a], rax;
                    break;
                case MVM_OP_bxor_i:
                    | xor WORK[reg_a], rax;
                    break;
                }
            }
        } else {
            if (operand_facts->flags & MVM_SPESH_FACT_KNOWN_VALUE &&
                fits_in_32_bit(operand_facts->value.i)) {
                MVMint64 value = operand_facts->value.i;
                MVM_spesh_graph_add_comment(tc, jg->sg, ins, "JIT optimized to register/constant operation");
                | mov rax, WORK[reg_b];
                switch(ins->info->opcode) {
                case MVM_OP_add_i:
                    | add rax, qword value;
                    break;
                case MVM_OP_sub_i:
                    | sub rax, qword value;
                    break;
                case MVM_OP_bor_i:
                    | or rax, qword value;
                    break;
                case MVM_OP_band_i:
                    | and rax, qword value;
                    break;
                case MVM_OP_bxor_i:
                    | xor rax, qword value;
                    break;
                }
                | mov WORK[reg_a], rax;
            } else {
                | mov rax, WORK[reg_b];
                switch(ins->info->opcode) {
                case MVM_OP_add_i:
                    | add rax, WORK[reg_c];
                    break;
                case MVM_OP_sub_i:
                    | sub rax, WORK[reg_c];
                    break;
                case MVM_OP_bor_i:
                    | or rax, WORK[reg_c];
                    break;
                case MVM_OP_band_i:
                    | and rax, WORK[reg_c];
                    break;
                case MVM_OP_bxor_i:
                    | xor rax, WORK[reg_c];
                    break;
                }
                | mov WORK[reg_a], rax;
            }
        }
        break;
    }
    case MVM_OP_mul_i:
    case MVM_OP_blshift_i:
    case MVM_OP_brshift_i: {
        MVMint32 reg_a = ins->operands[0].reg.orig;
        MVMint32 reg_b = ins->operands[1].reg.orig;
        MVMint32 reg_c = ins->operands[2].reg.orig;
        | mov rax, WORK[reg_b];
        switch(ins->info->opcode) {
        case MVM_OP_mul_i:
            | imul rax, WORK[reg_c];
            break;
        case MVM_OP_blshift_i:
            | mov cl, byte WORK[reg_c];
            | shl rax, cl;
            break;
        case MVM_OP_brshift_i:
            | mov cl, byte WORK[reg_c];
            | sar rax, cl;
            break;
        }
        | mov WORK[reg_a], rax;
        break;
    }
    case MVM_OP_pow_i: {
        MVMint16 dst  = ins->operands[0].reg.orig;
        MVMint16 base = ins->operands[1].reg.orig;
        MVMint16 exp  = ins->operands[2].reg.orig;
        | xor rax, rax;
        | mov rcx, WORK[exp];
        | cmp rcx, rax;
        | jl >3;
        | inc rax;
        | mov r8, WORK[base];
        |1:
        | test rcx, 1;
        | jz >2;
        | imul r8;
        |2:
        | imul r8, r8;
        | shr rcx, 1;
        | jnz <1;
        |3:
        | mov WORK[dst], rax;
        break;
    }
    case MVM_OP_div_i: {
        MVMint16 dst = ins->operands[0].reg.orig;
        MVMint16 a   = ins->operands[1].reg.orig;
        MVMint16 b   = ins->operands[2].reg.orig;
        | mov rax, WORK[a];
        | mov rcx, WORK[b];
        | cmp rcx, 0;
        | jnz >1;
        | throw_adhoc "Division by zero";
        |1:
        /* either num < 0, or denom < 0, but not both */
        | setl dh;
        | cmp rax, 0;
        | setl dl;
        | xor dl, dh;
        | movzx r8d, dl;
        | cqo;
        | idiv rcx;
        | test rdx, rdx;
        | setnz cl;
        | and r8b, cl;
        /* r8 = bias = (modulo != 0) & ((num < 0) ^ (denom < 0)) */
        | sub rax, r8;
        | mov WORK[dst], rax;
        break;
    }
    case MVM_OP_mod_i: {
        MVMint16 dst = ins->operands[0].reg.orig;
        MVMint16 a   = ins->operands[1].reg.orig;
        MVMint16 b   = ins->operands[2].reg.orig;
        | mov rax, WORK[a];
        | mov rcx, WORK[b];
        | cmp rcx, 0;
        | jnz >1;
        | throw_adhoc "Division by zero";
        |1:
        | cqo;
        | idiv rcx;
        | mov WORK[dst], rdx;
        break;
    }
    case MVM_OP_inc_i: {
         MVMint32 reg = ins->operands[0].reg.orig;
         | add qword WORK[reg], 1;
         break;
    }
    case MVM_OP_dec_i: {
        MVMint32 reg = ins->operands[0].reg.orig;
        | sub qword WORK[reg], 1;
        break;
    }
    case MVM_OP_bnot_i: {
        MVMint16 dst = ins->operands[0].reg.orig;
        MVMint16 src = ins->operands[1].reg.orig;
        | mov TMP1, WORK[src];
        | not TMP1;
        | mov WORK[dst], TMP1;
        break;
    }
    case MVM_OP_neg_i: {
        MVMint16 dst = ins->operands[0].reg.orig;
        MVMint16 src = ins->operands[1].reg.orig;
        | mov TMP1, WORK[src];
        | neg TMP1;
        | mov WORK[dst], TMP1;
        break;
    }
    case MVM_OP_extend_i8: {
        MVMint16 dst = ins->operands[0].reg.orig;
        MVMint16 src = ins->operands[1].reg.orig;
        | mov RV, WORK[src];
        | cbw;
        | cwde;
        | cdqe;
        | mov WORK[dst], RV;
        break;
    }
    case MVM_OP_extend_u8: {
        MVMint16 dst = ins->operands[0].reg.orig;
        MVMint16 src = ins->operands[1].reg.orig;
        | mov TMP1, WORK[src];
        | xor TMP2, TMP2;
        | mov TMP2b, TMP1b;
        | mov WORK[dst], TMP2;
        break;
    }
    case MVM_OP_extend_i16: {
        MVMint16 dst = ins->operands[0].reg.orig;
        MVMint16 src = ins->operands[1].reg.orig;
        | mov RV, WORK[src];
        | cwde;
        | cdqe;
        | mov WORK[dst], RV;
        break;
    }
    case MVM_OP_extend_u16: {
        MVMint16 dst = ins->operands[0].reg.orig;
        MVMint16 src = ins->operands[1].reg.orig;
        | mov TMP1, WORK[src];
        | xor TMP2, TMP2;
        | mov TMP2w, TMP1w;
        | mov WORK[dst], TMP2;
        break;
    }
    case MVM_OP_extend_i32: {
        MVMint16 dst = ins->operands[0].reg.orig;
        MVMint16 src = ins->operands[1].reg.orig;
        | mov RV, WORK[src];
        | cdqe;
        | mov WORK[dst], RV;
        break;
    }
    case MVM_OP_extend_u32: {
        MVMint16 dst = ins->operands[0].reg.orig;
        MVMint16 src = ins->operands[1].reg.orig;
        | mov TMP1, WORK[src];
        | mov TMP2d, TMP1d;
        | mov WORK[dst], TMP1;
        break;
    }
    case MVM_OP_trunc_u8:
    case MVM_OP_trunc_i8: {
        MVMint16 dst = ins->operands[0].reg.orig;
        MVMint16 src = ins->operands[1].reg.orig;
        | mov TMP1, WORK[src];
        | xor TMP2, TMP2;
        | mov TMP2b, TMP1b;
        | mov WORK[dst], TMP2;
        break;
    }
    case MVM_OP_trunc_u16:
    case MVM_OP_trunc_i16: {
        MVMint16 dst = ins->operands[0].reg.orig;
        MVMint16 src = ins->operands[1].reg.orig;
        | mov TMP1, WORK[src];
        | xor TMP2, TMP2;
        | mov TMP2w, TMP1w;
        | mov WORK[dst], TMP2;
        break;
    }
    case MVM_OP_trunc_u32:
    case MVM_OP_trunc_i32: {
        MVMint16 dst = ins->operands[0].reg.orig;
        MVMint16 src = ins->operands[1].reg.orig;
        | mov TMP1, WORK[src];
        /* x86-64 auto-truncates the upper 32 bits when using registers in 32
         * bit mode */
        | mov TMP2d, TMP1d;
        | mov WORK[dst], TMP2;
        break;
    }
    case MVM_OP_add_n:
    case MVM_OP_sub_n:
    case MVM_OP_mul_n:
    case MVM_OP_div_n: {
        MVMint16 reg_a = ins->operands[0].reg.orig;
        MVMint16 reg_b = ins->operands[1].reg.orig;
        MVMint16 reg_c = ins->operands[2].reg.orig;
        /* Copying data to xmm (floating point) registers requires
         * a special move instruction */
        | movsd xmm0, qword WORK[reg_b];
        switch(ins->info->opcode) {
        case MVM_OP_add_n:
            | addsd xmm0, qword WORK[reg_c];
            break;
        case MVM_OP_sub_n:
            | subsd xmm0, qword WORK[reg_c];
            break;
        case MVM_OP_mul_n:
            | mulsd xmm0, qword WORK[reg_c];
            break;
        case MVM_OP_div_n:
            | divsd xmm0, qword WORK[reg_c];
            break;
        }
        | movsd qword WORK[reg_a], xmm0;
        break;
    }
    case MVM_OP_coerce_iu:
    case MVM_OP_coerce_ui: {
        MVMint16 dst = ins->operands[0].reg.orig;
        MVMint16 src = ins->operands[1].reg.orig;
        | mov TMP1, WORK[src];
        | mov WORK[dst], TMP1;
        break;
    }
    case MVM_OP_coerce_in: {
        MVMint16 dst = ins->operands[0].reg.orig;
        MVMint16 src = ins->operands[1].reg.orig;
        /* convert simple integer to double precision */
        | cvtsi2sd xmm0, qword WORK[src];
        | movsd qword WORK[dst], xmm0;
        break;
    }
    case MVM_OP_coerce_ni: {
        MVMint16 dst = ins->operands[0].reg.orig;
        MVMint16 src = ins->operands[1].reg.orig;
        /* convert double precision to simple intege */
        | cvttsd2si rax, qword WORK[src];
        | mov WORK[dst], rax;
        break;
    }
    case MVM_OP_neg_n: {
        /* Negation is xor-ing the highest byte. Pretty simple right */
        MVMint16 dst = ins->operands[0].reg.orig;
        MVMint16 src = ins->operands[1].reg.orig;
        | mov TMP1, 1;
        | sal TMP1, 63;
        | mov TMP2, qword WORK[src];
        | xor TMP2, TMP1;
        | mov qword WORK[dst], TMP2;
        break;
    }
    case MVM_OP_eq_i:
    case MVM_OP_eqaddr:
    case MVM_OP_ne_i:
    case MVM_OP_lt_i:
    case MVM_OP_le_i:
    case MVM_OP_gt_i:
    case MVM_OP_ge_i: {
        MVMint32 reg_a = ins->operands[0].reg.orig;
        MVMint32 reg_b = ins->operands[1].reg.orig;
        MVMint32 reg_c = ins->operands[2].reg.orig;
        | mov rax, WORK[reg_b];
        /* comparison result in the setting bits in the rflags register */
        | cmp rax, WORK[reg_c];
        /* copy the right comparison bit to the lower byte of the rax
           register */
        switch(ins->info->opcode) {
        case MVM_OP_eqaddr:
        case MVM_OP_eq_i:
            | sete al;
            break;
        case MVM_OP_ne_i:
            | setne al;
            break;
        case MVM_OP_lt_i:
            | setl al;
            break;
        case MVM_OP_le_i:
            | setle al;
            break;
        case MVM_OP_gt_i:
            | setg al;
            break;
        case MVM_OP_ge_i:
            | setge al;
            break;
        }
        /* zero extend al (lower byte) to rax (whole register) */
        | movzx rax, al;
        | mov WORK[reg_a], rax;
        break;
    }
    case MVM_OP_cmp_i : {
        MVMint32 reg_a = ins->operands[0].reg.orig;
        MVMint32 reg_b = ins->operands[1].reg.orig;
        MVMint32 reg_c = ins->operands[2].reg.orig;
        | mov TMP1, WORK[reg_b];
        /* comparison result in the setting bits in the rflags register */
        | cmp TMP1, WORK[reg_c];
        /* copy the right comparison bit to the lower byte of the rax
           register */
        | setg TMP2b;
        | movzx TMP2, TMP2b;
        | setl TMP3b;
        | movzx TMP3, TMP3b;
        | sub TMP2, TMP3;
        | mov WORK[reg_a], TMP2;
        break;
    }
    case MVM_OP_gt_s:
    case MVM_OP_ge_s:
    case MVM_OP_lt_s:
    case MVM_OP_le_s: {
        /* src/jit/graph.c already put a call to the MVM_string_compare
           function into the graph, so here we just have to deal with the
           returned integers. */
        MVMint32 reg = ins->operands[0].reg.orig;
        switch(ins->info->opcode) {
            case MVM_OP_gt_s:
                | mov TMP2, 1;
                break;
            case MVM_OP_lt_s:
                | mov TMP2, -1;
                break;
            case MVM_OP_ge_s: case MVM_OP_le_s:
                | mov TMP2, 0;
                break;
        }
        | cmp TMP2, WORK[reg];
        /* Mind the reversedness of the constant and the value as compared to
         * interp.c */
        switch(ins->info->opcode) {
            case MVM_OP_gt_s:
                | sete al;
                break;
            case MVM_OP_ge_s:
                | setle al;
                break;
            case MVM_OP_lt_s:
                | sete al;
                break;
            case MVM_OP_le_s:
                | setge al;
                break;
        }
        | movzx rax, al;
        | mov WORK[reg], rax;
        break;
    }
    case MVM_OP_not_i: {
        MVMint16 dst = ins->operands[0].reg.orig;
        MVMint16 src = ins->operands[1].reg.orig;
        | mov TMP1, WORK[src];
        | test TMP1, TMP1;
        | setz TMP2b;
        | movzx TMP2, TMP2b;
        | mov WORK[dst], TMP2;
        break;
    }
    case MVM_OP_eq_n:
    case MVM_OP_ne_n:
    case MVM_OP_le_n:
    case MVM_OP_lt_n:
    case MVM_OP_ge_n:
    case MVM_OP_gt_n: {
        MVMint16 dst = ins->operands[0].reg.orig;
        MVMint16 a   = ins->operands[1].reg.orig;
        MVMint16 b   = ins->operands[2].reg.orig;
        if (op == MVM_OP_eq_n) {
            | mov TMP1, 0;
        } else if (op == MVM_OP_ne_n) {
            | mov TMP1, 1;
        }
        if (op == MVM_OP_lt_n || op == MVM_OP_le_n) {
            | movsd xmm0, qword WORK[b];
            | ucomisd xmm0, qword WORK[a];
        } else {
            | movsd xmm0, qword WORK[a];
            | ucomisd xmm0, qword WORK[b];
        }

        if (op == MVM_OP_le_n || op == MVM_OP_ge_n) {
            | setae TMP1b;
        } else if (op == MVM_OP_eq_n) {
            | setnp TMP2b; // zero if either is NaN, 1 otherwise
            | cmove TMP1, TMP2; // if equal, overwrite 0 with 1
        } else if (op == MVM_OP_ne_n) {
            | setp TMP2b; // 1 if either is NaN (in which case they can't be equal)
            | cmove TMP1, TMP2; // if equal, overwrite 1 with IsNan(a) | IsNaN(b)
        } else {
            | seta TMP1b;
        }
        | movzx TMP1, TMP1b;
        | mov WORK[dst], TMP1;
        break;
    }
    case MVM_OP_cmp_n: {
        MVMint16 dst = ins->operands[0].reg.orig;
        MVMint16 a   = ins->operands[1].reg.orig;
        MVMint16 b   = ins->operands[2].reg.orig;
        | movsd xmm0, qword WORK[a];
        | movsd xmm1, qword WORK[b];
        | ucomisd xmm0, xmm1
        | seta TMP1b;
        | movzx rax, TMP1b;
        | ucomisd xmm1, xmm0
        | seta TMP1b;
        | movzx TMP1, TMP1b;
        | sub rax, TMP1;
        | mov WORK[dst], rax;
        break;
    }
    case MVM_OP_eq_I:
    case MVM_OP_ne_I:
    case MVM_OP_lt_I:
    case MVM_OP_le_I:
    case MVM_OP_gt_I:
    case MVM_OP_ge_I: {
        MVMint32 reg_a = ins->operands[0].reg.orig;
        MVMint32 reg_b = ins->operands[1].reg.orig;
        MVMint32 reg_c = ins->operands[2].reg.orig;
        /* Call the bigint comparison function. */
        | mov ARG1, TC;
        | mov ARG2, WORK[reg_b];
        | mov ARG3, WORK[reg_c];
        | callp &MVM_bigint_cmp;
        /* Handle result by opcode. */
        switch(ins->info->opcode) {
        case MVM_OP_eq_I:
            | cmp RV, MP_EQ
            | sete al;
            break;
        case MVM_OP_ne_I:
            | cmp RV, MP_EQ
            | setne al;
            break;
        case MVM_OP_lt_I:
            | cmp RV, MP_LT
            | sete al;
            break;
        case MVM_OP_le_I:
            | cmp RV, MP_GT
            | setne al;
            break;
        case MVM_OP_gt_I:
            | cmp RV, MP_GT
            | sete al;
            break;
        case MVM_OP_ge_I:
            | cmp RV, MP_LT
            | setne al;
            break;
        }
        /* zero extend al (lower byte) to rax (whole register) */
        | movzx rax, al;
        | mov WORK[reg_a], rax;
        break;
    }
    case MVM_OP_isint:
    case MVM_OP_isnum:
    case MVM_OP_isstr:
    case MVM_OP_islist:
    case MVM_OP_ishash: {
        MVMint16 dst = ins->operands[0].reg.orig;
        MVMint16 obj = ins->operands[1].reg.orig;
        MVMint32 reprid = op == MVM_OP_isint ? MVM_REPR_ID_P6int :
                          op == MVM_OP_isnum ? MVM_REPR_ID_P6num :
                          op == MVM_OP_isstr ? MVM_REPR_ID_P6str :
                          op == MVM_OP_islist ? MVM_REPR_ID_VMArray :
                     /*  op == MVM_OP_ishash */ MVM_REPR_ID_MVMHash;
        | mov TMP1, aword WORK[obj];
        | test TMP1, TMP1;
        | jz >1;
        | mov TMP1, OBJECT:TMP1->st;
        | mov TMP1, STABLE:TMP1->REPR;
        | cmp qword REPR:TMP1->ID, reprid;
        | jne >1;
        | mov qword WORK[dst], 1;
        | jmp >2;
        |1:
        | mov qword WORK[dst], 0;
        |2:
        break;
    }
    case MVM_OP_sp_boolify_iter_arr: {
        MVMint16 dst = ins->operands[0].reg.orig;
        MVMint16 obj = ins->operands[1].reg.orig;
        | mov TMP1, aword WORK[obj];
        | mov TMP2, MVMITER:TMP1->body.array_state.index;
        | add TMP2, 1;
        | mov TMP3, MVMITER:TMP1->body.array_state.limit;
        /* index - limit will give a carry flag when index < limit */
        | cmp TMP2, TMP3;
        | setl TMP1b;
        | movzx TMP1, TMP1b;
        | mov aword WORK[dst], TMP1;
        break;
    }
    case MVM_OP_sp_boolify_iter_hash: {
        MVMint16 dst = ins->operands[0].reg.orig;
        MVMint16 obj = ins->operands[1].reg.orig;
        | mov TMP1, aword WORK[obj];
        | mov TMP2, MVMITER:TMP1->body.hash_state.next;
        | test TMP2, TMP2;
        | setnz TMP2b;
        | movzx TMP2, TMP2b;
        | mov aword WORK[dst], TMP2;
        break;
    }
    case MVM_OP_objprimspec: {
        MVMint16 dst  = ins->operands[0].reg.orig;
        MVMint16 type = ins->operands[1].reg.orig;
        | mov TMP6, aword WORK[type];
        | test TMP6, TMP6;
        | jz >1;
        | mov ARG1, TC;
        | mov ARG2, OBJECT:TMP6->st;
        | mov FUNCTION, STABLE:ARG2->REPR;
        | mov FUNCTION, REPR:FUNCTION->get_storage_spec;
        | call FUNCTION;
        | movzx TMP6, word STORAGESPEC:RV->boxed_primitive;
        |1:
        | mov aword WORK[dst], TMP6;
        break;
    }
    case MVM_OP_objprimbits: {
        MVMint16 dst  = ins->operands[0].reg.orig;
        MVMint16 type = ins->operands[1].reg.orig;
        | mov TMP6, aword WORK[type];
        | test TMP6, TMP6;
        | jz >1;
        | mov ARG2, OBJECT:TMP6->st;
        | mov FUNCTION, STABLE:ARG2->REPR;
        | mov FUNCTION, REPR:FUNCTION->get_storage_spec;
        | mov ARG1, TC;
        | call FUNCTION;
        | movzx TMP6, word STORAGESPEC:RV->boxed_primitive;
        | test TMP6, TMP6;
        | jz >1;
        | movzx TMP6, word STORAGESPEC:RV->bits;
        |1:
        | mov aword WORK[dst], TMP6;
        break;
    }
    case MVM_OP_objprimunsigned: {
        MVMint16 dst  = ins->operands[0].reg.orig;
        MVMint16 type = ins->operands[1].reg.orig;
        | mov TMP5, aword WORK[type];
        | test TMP5, TMP5;
        | jz >1;
        | mov ARG1, TC;
        | mov ARG2, OBJECT:TMP5->st;
        | mov FUNCTION, STABLE:ARG2->REPR;
        | mov FUNCTION, REPR:FUNCTION->get_storage_spec;
        | call FUNCTION;
        | cmp word STORAGESPEC:RV->boxed_primitive, 1;
        | jne >1;
        | movzx TMP6, byte STORAGESPEC:RV->is_unsigned;
        | mov WORK[dst], TMP6;
        | jmp >2;
        |1:
        | mov aword WORK[dst], 0;
        |2:
        break;
    }
    case MVM_OP_isnonnull: {
        MVMint16 dst = ins->operands[0].reg.orig;
        MVMint16 obj = ins->operands[1].reg.orig;
        | mov TMP1, WORK[obj];
        | test TMP1, TMP1;
        | setnz TMP2b;
        | get_vmnull TMP3;
        | cmp TMP1, TMP3;
        | setne TMP3b;
        | and TMP2b, TMP3b;
        | movzx TMP2, TMP2b;
        | mov WORK[dst], TMP2;
        break;
    }
    case MVM_OP_scgethandle: {
        MVMint16 dst = ins->operands[0].reg.orig;
        MVMint16 sc  = ins->operands[1].reg.orig;
        | mov TMP5, aword WORK[sc];
        | cmp_repr_id TMP5, TMP6, MVM_REPR_ID_SCRef;
        | je >1;
        | throw_adhoc "Must provide an SCRef operand to scgethandle"
        |1:
        | mov ARG1, TC;
        | mov ARG2, SCREF:TMP5;
        | callp &MVM_sc_get_handle;
        | mov WORK[dst], RV;
        break;
    }
    case MVM_OP_scobjcount: {
        MVMint16 dst = ins->operands[0].reg.orig;
        MVMint16 sc  = ins->operands[1].reg.orig;
        | mov TMP5, aword WORK[dst];
        | cmp_repr_id TMP5, TMP6, MVM_REPR_ID_SCRef;
        | je >1;
        | throw_adhoc "Must provide an SCRef operand to scobjcount"
        |1:
        | mov ARG1, TC;
        | mov ARG2, SCREF:TMP5;
        | callp &MVM_sc_get_object_count;
        | mov WORK[dst], RV;
        break;
    }
    case MVM_OP_setobjsc: {
        MVMint16 dst = ins->operands[0].reg.orig;
        MVMint16 sc  = ins->operands[1].reg.orig;
        | mov TMP5, WORK[sc];
        | cmp_repr_id TMP5, TMP6, MVM_REPR_ID_SCRef;
        | je >1;
        | throw_adhoc "Must provide an SCRef operand to setobjsc"
        |1:
        | mov ARG1, TC;
        | mov ARG2, aword WORK[dst];
        | mov ARG3, SCREF:TMP5;
        | callp &MVM_sc_set_obj_sc;
        break;
    }
    case MVM_OP_isnull: {
        MVMint16 dst = ins->operands[0].reg.orig;
        MVMint16 obj = ins->operands[1].reg.orig;
        | mov TMP1, WORK[obj];
        | test TMP1, TMP1;
        | setz TMP2b;
        | get_vmnull TMP3;
        | cmp TMP1, TMP3;
        | sete TMP3b;
        | or TMP2b, TMP3b;
        | movzx TMP2, TMP2b;
        | mov WORK[dst], TMP2;
        break;
    }
    case MVM_OP_sp_fastcreate: {
        MVMint16 dst = ins->operands[0].reg.orig;
        emit_fastcreate(tc, compiler, jg, ins);
        | mov aword WORK[dst], RV;
        break;
    }
    case MVM_OP_decont:
    case MVM_OP_sp_decont: {
        MVMint16 dst = ins->operands[0].reg.orig;
        MVMint16 src = ins->operands[1].reg.orig;
        | mov TMP5, WORK[src];
        | test TMP5, TMP5;
        // obj is null
        | jz >1;
        | test_type_object TMP5;
        // object is type object (not concrete)
        | jnz >1;
        | mov TMP6, OBJECT:TMP5->st;
        | mov TMP6, STABLE:TMP6->container_spec;
        | test TMP6, TMP6;
        // container spec is zero
        | jz >1;
        | mov ARG1, TC;
        | mov ARG2, TMP5;      // object
        | lea ARG3, WORK[dst]; // destination register
        | mov FUNCTION, CONTAINERSPEC:TMP6->fetch; // get function pointer
        | call FUNCTION;
        | jmp >2;
        |1:
        // otherwise just move the object into the register
        | mov WORK[dst], TMP5;
        |2:
        break;
    }
    case MVM_OP_iscont: {
        MVMint16 dst = ins->operands[0].reg.orig;
        MVMint16 obj = ins->operands[1].reg.orig;
        | mov TMP1, aword WORK[obj];
        | test TMP1, TMP1;
        | jz >1;
        | mov TMP1, OBJECT:TMP1->st;
        | mov TMP1, STABLE:TMP1->container_spec;
        | test TMP1, TMP1;
        |1:
        | setnz TMP1b;
        | movzx TMP1, TMP1b;
        | mov qword WORK[dst], TMP1;
        break;
    }
    case MVM_OP_sp_findmeth: {
        MVMint16 dst = ins->operands[0].reg.orig;
        MVMint16 obj = ins->operands[1].reg.orig;
        MVMint32 str_idx = ins->operands[2].lit_str_idx;
        MVMuint16 ss_idx = ins->operands[3].lit_i16;
        | get_spesh_slot TMP1, ss_idx;
        | mov TMP2, WORK[obj];
        | mov TMP2, OBJECT:TMP2->st;
        | cmp TMP1, TMP2;
        | jne >1;
        | get_spesh_slot TMP3, ss_idx + 1;
        | mov WORK[dst], TMP3;
        | jmp >2;
        |1:
        /* call find_method_spesh */
        | mov ARG1, TC;
        | mov ARG2, WORK[obj];
        | get_string ARG3, str_idx;
        | mov ARG4, ss_idx;
        | lea TMP6, WORK[dst];
        | mov ARG5, TMP6;
        | callp &MVM_6model_find_method_spesh;
        |2:
        break;
    }
    case MVM_OP_isconcrete: {
        MVMint16 dst = ins->operands[0].reg.orig;
        MVMint16 obj = ins->operands[1].reg.orig;
        | mov TMP1, WORK[obj];
        | test TMP1, TMP1;
        | jz >1;
        | test_type_object TMP1;
        | jnz >1;
        | mov qword WORK[dst], 1;
        | jmp >2;
        |1:
        | mov qword WORK[dst], 0;
        |2:
        break;
    }
    case MVM_OP_isinvokable: {
        MVMint16 dst = ins->operands[0].reg.orig;
        MVMint16 src = ins->operands[1].reg.orig;
        | mov TMP1, aword WORK[src];
        | mov TMP1, OBJECT:TMP1->st;
        | mov TMP2, 1;
        | mov64 TMP3, ((uintptr_t)(MVM_6model_invoke_default));
        | cmp TMP3, STABLE:TMP1->invoke;
        | jne >1;
        | mov TMP4, STABLE:TMP1->invocation_spec;
        | test TMP4, TMP4;
        | jnz >1;
        | and TMP2, TMP4;
        |1:
        | mov aword WORK[dst], TMP2;
        break;
    }
    case MVM_OP_takehandlerresult: {
        MVMint16 dst = ins->operands[0].reg.orig;
        | mov TMP1, aword TC->last_handler_result;
        | test TMP1, TMP1;
        | jnz >1;
        | get_vmnull TMP1;
        |1:
        | mov aword WORK[dst], TMP1;
        | mov aword TC->last_handler_result, 0;
        break;
    }
    case MVM_OP_exception: {
        MVMint16 dst = ins->operands[0].reg.orig;
        | mov TMP1, TC->active_handlers;
        | test TMP1, TMP1;
        | jz >1;
        | mov TMP1, MVMACTIVEHANDLERS:TMP1->ex_obj;
        | mov WORK[dst], TMP1;
        | jmp >2;
        |1:
        | get_vmnull TMP1;
        | mov WORK[dst], TMP1;
        |2:
        break;
    }
    case MVM_OP_scwbdisable: {
        MVMint16 dst = ins->operands[0].reg.orig;
        | mov TMP1d, dword TC->sc_wb_disable_depth;
        | add TMP1d, 1;
        | mov dword TC->sc_wb_disable_depth, TMP1d;
        | mov qword WORK[dst], TMP1;
        break;
    }
    case MVM_OP_scwbenable: {
        MVMint16 dst = ins->operands[0].reg.orig;
        | mov TMP1d, dword TC->sc_wb_disable_depth; // should do zero-extension
        | sub TMP1d, 1;
        | mov dword TC->sc_wb_disable_depth, TMP1d;
        | mov qword WORK[dst], TMP1;
        break;
    }
    case MVM_OP_assign:
    case MVM_OP_assignunchecked: {
        MVMint16 cont = ins->operands[0].reg.orig;
        MVMint16 obj  = ins->operands[1].reg.orig;
        | mov ARG2, aword WORK[cont];
        | mov FUNCTION, OBJECT:ARG2->st;
        | mov FUNCTION, STABLE:FUNCTION->container_spec;
        | test FUNCTION, FUNCTION;
        | jnz >1;
        | throw_adhoc "Cannot assign to an immutable value";
        |1:
        | mov ARG1, TC;
        | mov ARG3, aword WORK[obj];
        if (op == MVM_OP_assign) {
            | mov FUNCTION, CONTAINERSPEC:FUNCTION->store;
        } else {
            | mov FUNCTION, CONTAINERSPEC:FUNCTION->store_unchecked;
        }
        | call FUNCTION;
        break;
    }
    case MVM_OP_getlexstatic_o:
    case MVM_OP_getlexperinvtype_o: {
        MVMint16 dst  = ins->operands[0].reg.orig;
        MVMint16 name = ins->operands[1].reg.orig;
        | mov ARG1, TC;
        | mov ARG2, aword WORK[name];
        | mov ARG3, MVM_reg_obj;
        | callp &MVM_frame_find_lexical_by_name;
        | test RV, RV;
        | jz >1;
        | mov RV, [RV];
        |1:
        | mov WORK[dst], RV;
        break;
    }
    case MVM_OP_paramnamesused:
        | mov ARG2, TC->cur_frame;
        | lea ARG2, FRAME:ARG2->params;
        | mov TMP5w, word ARGCTX:ARG2->num_pos;
        | cmp TMP5w, word ARGCTX:ARG2->arg_count;
        | je >1;
        | mov ARG1, TC;
        | callp &MVM_args_assert_nameds_used;
        |1:
        break;
    case MVM_OP_assertparamcheck: {
        MVMint16 ok = ins->operands[0].reg.orig;
        | mov TMP1, qword WORK[ok];
        | test TMP1, TMP1;
        | jnz >1;
        | mov ARG1, TC;
        | callp &MVM_args_bind_failed;
        |1:
        break;
    }
    case MVM_OP_prof_enterspesh:
        | mov ARG1, TC;
        | mov ARG2, TC->cur_frame;
        | mov ARG2, aword FRAME:ARG2->static_info;
        | mov ARG3, aword MVM_PROFILE_ENTER_JIT;
        | callp &MVM_profile_log_enter;
        break;
    case MVM_OP_prof_enterinline: {
        MVMint16 spesh_idx = ins->operands[0].lit_i16;
        | mov ARG1, TC;
        | get_spesh_slot ARG2, spesh_idx;
        | mov ARG3, aword MVM_PROFILE_ENTER_JIT_INLINE;
        | callp &MVM_profile_log_enter;
        break;
    }
    case MVM_OP_prof_replaced: {
        MVMint16 spesh_idx = ins->operands[0].lit_i16;
        | mov ARG1, TC;
        | get_spesh_slot ARG2, spesh_idx;
        | callp &MVM_profile_log_scalar_replaced;
        break;
    }
    case MVM_OP_getobjsc: {
        MVMint16 dst = ins->operands[0].reg.orig;
        MVMint16 obj = ins->operands[1].reg.orig;
        | mov TMP1, aword WORK[obj];
        | mov TMP2d, dword COLLECTABLE:TMP1->sc_forward_u.sc.sc_idx;
        | xor TMP3, TMP3;
        | cmp TMP2d, 0;
        | jle >1;
        | mov TMP3, aword TC->instance;
        | mov TMP3, aword MVMINSTANCE:TMP3->all_scs;
        | mov TMP3, aword [TMP3 + TMP2d * 8];
        | mov TMP3, SCREFBODY:TMP3->sc;
        |1:
        | mov aword WORK[dst], TMP3;
        break;
    }
    case MVM_OP_invokewithcapture: {
        MVMint16 dest    = ins->operands[0].reg.orig;
        MVMint16 code    = ins->operands[1].reg.orig;
        MVMint16 capture = ins->operands[2].reg.orig;
        | mov TMP1, aword WORK[capture];
        /* if (IS_CONCRETE(capture) && REPR(capture)->ID == MVM_REPR_ID_MVMCallCapture) */
        | test_type_object TMP1;
        | jnz >1;
        | cmp_repr_id TMP1, TMP2, MVM_REPR_ID_MVMCallCapture;
        | je >2;
        |1:
        /* else throw */
        | throw_adhoc "invokewithcapture needs a MVMCallCapture";
        |2:
        /* code = MVM_frame_find_invokee(tc, code, NULL) */
        | mov ARG1, TC;
        | mov ARG2, WORK[code];
        | xor ARG3, ARG3;
        | callp &MVM_frame_find_invokee;

        | mov ARG2, RV; // ARG2 = code
        /* tc->cur_frame->return_value = &GET_REG(cur_op, 2) */
        | lea ARG1, WORK[dest]; // ARG1 is used as scratch space
        | mov TMP6, aword TC->cur_frame;
        | mov aword FRAME:TMP6->return_value, ARG1;
        /* tc->cur_frame->return_type = MVM_RETURN_OBJ */
        | mov byte FRAME:TMP6->return_type, MVM_RETURN_OBJ;
        /* tc->cur_frame->return_address = cur_op */
        | get_cur_op ARG1;
        | mov aword FRAME:TMP6->return_address, ARG1;
        /* STABLE(code)->invoke(tc, code, capture->body.apc->effective_callsite,
           capture->body.apc->arg) */
        | mov ARG1, TC;
        | mov ARG3, WORK[capture];
        | mov ARG4, aword CAPTURE:ARG3->body.apc;
        | mov ARG3, aword ARGCTX:ARG4->callsite;
        | mov ARG4, aword ARGCTX:ARG4->args;
        | mov FUNCTION, OBJECT:ARG2->st;
        | mov FUNCTION, STABLE:FUNCTION->invoke;
        | call FUNCTION;
        /* jumping out is handled by invokish */
        break;
    }
    case MVM_OP_captureposelems: {
        MVMint16 dest    = ins->operands[0].reg.orig;
        MVMint16 capture = ins->operands[1].reg.orig;
        | mov TMP1, aword WORK[capture];
        /* if (IS_CONCRETE(capture) && REPR(capture)->ID == MVM_REPR_ID_MVMCallCapture) */
        | test_type_object TMP1;
        | jnz >1;
        | cmp_repr_id TMP1, TMP2, MVM_REPR_ID_MVMCallCapture;
        | je >2;
        |1:
        /* else throw */
        | throw_adhoc "captureposelems needs a concrete MVMCallCapture";
        |2:
        | mov TMP2, aword CAPTURE:TMP1->body.apc;
        | movzx TMP2, word ARGPROCCONTEXT:TMP2->num_pos;
        | mov WORK[dest], TMP2;
        break;
    }
    case MVM_OP_captureexistsnamed: {
        MVMint16 dst = ins->operands[0].reg.orig;
        MVMint16 obj = ins->operands[1].reg.orig;
        MVMint16 name = ins->operands[2].reg.orig;
        | mov TMP1, WORK[obj];
        | test_type_object TMP1;
        | jnz >1;
        | cmp_repr_id TMP1, TMP2, MVM_REPR_ID_MVMCallCapture;
        | je >2;
        |1:
        | throw_adhoc "captureexistsnamed needs a MVMCallCapture";
        |2:
        | mov ARG2, CAPTURE:TMP1->body.apc;
        | mov ARG3, WORK[name];
        | mov ARG1, TC;
        | callp MVM_args_has_named;
        | mov WORK[dst], RV;
        break;
    }
    case MVM_OP_capturehasnameds: {
        MVMint16 dest    = ins->operands[0].reg.orig;
        MVMint16 capture = ins->operands[1].reg.orig;
        | mov TMP1, aword WORK[capture];
        /* if (IS_CONCRETE(capture) && REPR(capture)->ID == MVM_REPR_ID_MVMCallCapture) */
        | test_type_object TMP1;
        | jnz >1;
        | cmp_repr_id TMP1, TMP2, MVM_REPR_ID_MVMCallCapture;
        | je >2;
        |1:
        /* else throw */
        | throw_adhoc "capturehasnameds needs a concrete MVMCallCapture";
        |2:
        | mov TMP2, CAPTURE:TMP1->body.apc;
        | mov TMP3w, word ARGPROCCONTEXT:TMP2->num_pos;
        | mov TMP2w, word ARGPROCCONTEXT:TMP2->arg_count;
        | cmp TMP2w, TMP3w;
        | setne al;
        | movzx rax, al;
        | mov WORK[dest], rax;
        break;
    }
    case MVM_OP_capturenamedshash: {
        MVMint16 dst = ins->operands[0].reg.orig;
        MVMint16 obj = ins->operands[1].reg.orig;
        | mov TMP1, WORK[obj];
        | test_type_object TMP1;
        | jnz >1;
        | cmp_repr_id TMP1, TMP2, MVM_REPR_ID_MVMCallCapture;
        | je >2;
        |1:
        | throw_adhoc "capturenamedshash needs a MVMCallCapture";
        |2:
        | mov ARG2, CAPTURE:TMP1->body.apc;
        | mov ARG1, TC;
        | callp MVM_args_slurpy_named;
        | mov WORK[dst], RV;
        break;
    }
    case MVM_OP_getstdin:
    case MVM_OP_getstdout:
    case MVM_OP_getstderr: {
        MVMint16 dst = ins->operands[0].reg.orig;
        | mov TMP3, aword TC->instance;
        if (ins->info->opcode == MVM_OP_getstdin) {
            | mov TMP3, aword MVMINSTANCE:TMP3->stdin_handle;
        } else if (ins->info->opcode == MVM_OP_getstdout) {
            | mov TMP3, aword MVMINSTANCE:TMP3->stdout_handle;
        } else if (ins->info->opcode == MVM_OP_getstderr) {
            | mov TMP3, aword MVMINSTANCE:TMP3->stderr_handle;
        }
        | mov aword WORK[dst], TMP3;
        break;
    }
    case MVM_OP_ordat:
    case MVM_OP_ordbaseat:
    case MVM_OP_ordfirst: {
        MVMint16 dst = ins->operands[0].reg.orig;
        MVMint16 str = ins->operands[1].reg.orig;
        | mov ARG1, TC;
        | mov ARG2, aword WORK[str];
        if (op == MVM_OP_ordat || op == MVM_OP_ordbaseat) {
            MVMint16 idx = ins->operands[2].reg.orig;
            | mov ARG3, qword WORK[idx];
        } else {
            | mov ARG3, 0;
        }
        if (op == MVM_OP_ordbaseat) {
            | callp &MVM_string_ord_basechar_at;
        } else {
            | callp &MVM_string_ord_at;
        }
        | mov qword WORK[dst], RV;
        break;
    }
    case MVM_OP_getcodename: {
        MVMint16 obj  = ins->operands[0].reg.orig;
        MVMint16 code = ins->operands[1].reg.orig;
        | mov TMP1, aword WORK[code];
        | cmp_repr_id TMP1, TMP2, MVM_REPR_ID_MVMCode;
        | jne >1;
        | test_type_object TMP1;
        | jz >2;
        |1:
        | throw_adhoc "getcodename requires a concrete object";
        |2:
        | mov TMP2, CODE:TMP1->body.name;
        | mov WORK[obj], TMP2;
        break;
    }
    case MVM_OP_setcodeobj: {
        MVMint16 obj  = ins->operands[0].reg.orig;
        MVMint16 code = ins->operands[1].reg.orig;
        | mov TMP1, aword WORK[obj];
        /* if (REPR(obj)->ID == MVM_REPR_ID_MVMCode) */
        | cmp_repr_id TMP1, TMP2, MVM_REPR_ID_MVMCode;
        | je >1;
        /* else throw */
        | throw_adhoc "setcodeobj needs a code ref";
        |1:
        | mov TMP2, aword WORK[code];
        | mov aword CODE:TMP1->body.code_object, TMP2;
        | check_wb TMP1, TMP2, >2;
        | hit_wb TMP1, TMP2
        |2:
        |
        break;
    }
    case MVM_OP_lastexpayload: {
        MVMint16 dst = ins->operands[0].reg.orig;
        | mov TMP3, aword TC->last_payload;
        | mov aword WORK[dst], TMP3;
        break;
    }
    case MVM_OP_param_sp: {
        MVMuint16 dst = ins->operands[0].reg.orig;
        MVMuint16 off = ins->operands[1].lit_ui16;
        | mov ARG1, TC
        | mov ARG2, TC:ARG1->cur_frame
        | lea ARG2, FRAME:ARG2->params
        | mov ARG3, off
        | callp &MVM_args_slurpy_positional;
        | mov qword WORK[dst], RV;
        break;
    }
    case MVM_OP_param_sn: {
        MVMuint16 dst = ins->operands[0].reg.orig;
        | mov ARG1, TC
        | mov ARG2, TC:ARG1->cur_frame
        | xor ARG2, ARG2
        | callp &MVM_args_slurpy_named;
        | mov qword WORK[dst], RV;
        break;
    }
    case MVM_OP_sp_cas_o: {
        MVMint16 target = ins->operands[1].reg.orig;
        MVMint16 expected = ins->operands[2].reg.orig;
        MVMint16 value = ins->operands[3].reg.orig;
        MVMint16 result = ins->operands[0].reg.orig;
        | mov ARG1, TC;
        | mov ARG2, aword WORK[target];
        | mov ARG3, aword WORK[expected];
        | mov ARG4, aword WORK[value];
        |.if WIN32;
        | lea TMP6, WORK[result]
        | mov ARG5, TMP6
        |.else;
        | lea ARG5, WORK[result]
        |.endif
        | mov FUNCTION, OBJECT:ARG2->st;
        | mov FUNCTION, STABLE:FUNCTION->container_spec;
        | mov FUNCTION, CONTAINERSPEC:FUNCTION->cas;
        | call FUNCTION;
        break;
    }
    case MVM_OP_sp_atomicload_o: {
        MVMint16 target = ins->operands[1].reg.orig;
        MVMint16 result = ins->operands[0].reg.orig;
        | mov ARG1, TC;
        | mov ARG2, aword WORK[target];
        | mov FUNCTION, OBJECT:ARG2->st;
        | mov FUNCTION, STABLE:FUNCTION->container_spec;
        | mov FUNCTION, CONTAINERSPEC:FUNCTION->atomic_load;
        | call FUNCTION;
        | mov WORK[result], RV
        break;
    }
    case MVM_OP_sp_atomicstore_o: {
        MVMint16 target = ins->operands[0].reg.orig;
        MVMint16 value = ins->operands[1].reg.orig;
        | mov ARG1, TC;
        | mov ARG2, aword WORK[target];
        | mov ARG3, aword WORK[value];
        | mov FUNCTION, OBJECT:ARG2->st;
        | mov FUNCTION, STABLE:FUNCTION->container_spec;
        | mov FUNCTION, CONTAINERSPEC:FUNCTION->atomic_store;
        | call FUNCTION;
        break;
    }
    case MVM_OP_hllbool: {
        MVMint16 target = ins->operands[0].reg.orig;
        MVMint16 value = ins->operands[1].reg.orig;
        MVMHLLConfig *hll_config = (MVMHLLConfig*)jg->sg->sf->body.cu->body.hll_config;
        uintptr_t  true_value = (uintptr_t)hll_config->true_value;
        uintptr_t false_value = (uintptr_t)hll_config->false_value;
        | mov TMP1, WORK[value];
        | test TMP1, TMP1;
        | jnz >1;
        | mov64 TMP1, false_value;
        | jmp >2;
        |1:
        | mov64 TMP1, true_value;
        |2:
        | mov WORK[target], TMP1;
        break;
    }
    case MVM_OP_hllboolfor: {
        MVMint16 target = ins->operands[0].reg.orig;
        MVMint16 value = ins->operands[1].reg.orig;
        MVMHLLConfig *hll_config = (MVMHLLConfig*)ins->operands[2].lit_i64;
        uintptr_t  true_value = (uintptr_t)hll_config->true_value;
        uintptr_t false_value = (uintptr_t)hll_config->false_value;
        | mov TMP1, WORK[value];
        | test TMP1, TMP1;
        | jnz >1;
        | mov64 TMP1, false_value;
        | jmp >2;
        |1:
        | mov64 TMP1, true_value;
        |2:
        | mov WORK[target], TMP1;
        break;
    }
    case MVM_OP_sp_fastbox_i:
    case MVM_OP_sp_fastbox_i_ic:
    case MVM_OP_sp_fastbox_bi:
    case MVM_OP_sp_fastbox_bi_ic: {
        MVMint32 use_cache = op == MVM_OP_sp_fastbox_i_ic || op == MVM_OP_sp_fastbox_bi_ic;
        MVMint16 dst = ins->operands[0].reg.orig;
        MVMint16 offset = ins->operands[3].lit_i16;
        MVMint16 val = ins->operands[4].reg.orig;
        if (use_cache) {
            MVMObject **cache = tc->instance->int_const_cache->cache[ins->operands[5].lit_i16];
            MVMint16 dst = ins->operands[0].reg.orig;
            | mov TMP1, WORK[val]
            | cmp TMP1, 14
            | jg >1
            | cmp TMP1, -1
            | jl >1
            | inc TMP1
            | mov64 TMP2, (MVMuint64)cache
            | mov TMP2, [TMP2 + TMP1 * 8]
            | mov WORK[dst], TMP2
            | jmp >2
            |1:
        }
        emit_fastcreate(tc, compiler, jg, ins);
        | mov aword WORK[dst], RV;
        | mov TMP3, WORK[val];
        if (op == MVM_OP_sp_fastbox_i || op == MVM_OP_sp_fastbox_i_ic) {
            /* Normal integer */
            | mov qword [RV+offset], TMP3;
        }
        else {
            /* Big integer; if it's in smallint range we poke it in directly,
             * and if not we do a function call to create a bigint. */
            MVMuint16 val_offset = offset + 4;
            | cmp TMP3, qword 2147483647LL
            | jg >3
            | cmp TMP3, qword -2147483648LL
            | jl >3
            | mov dword [RV+offset], MVM_BIGINT_32_FLAG
            | mov dword [RV+val_offset], TMP3d
            | jmp >4
            |3:
            | mov ARG1, RV
            | add ARG1, offset
            | mov ARG2, TMP3
            | callp &MVM_p6bigint_store_as_mp_int;
            |4:
        }
        if (use_cache) {
            |2:
        }
        break;
    }
    case MVM_OP_sp_add_I:
    case MVM_OP_sp_sub_I:
    case MVM_OP_sp_mul_I: {
        MVMint16 a = ins->operands[3].reg.orig;
        MVMint16 b = ins->operands[4].reg.orig;
        MVMint16 c = ins->operands[0].reg.orig;
        MVMint16 offset = ins->operands[5].lit_i16;
        MVMint16 val_offset = offset + 4;
        MVMObject **cache = tc->instance->int_const_cache->cache[ins->operands[6].lit_i16];

        /* See if they're both smallint. */
        | mov TMP1, WORK[a];
        | mov TMP2, WORK[b];
        | mov TMP4d, dword [TMP1 + offset];
        | cmp TMP4d, MVM_BIGINT_32_FLAG;
        | jne >1;
        | mov TMP4d, dword [TMP2 + offset];
        | cmp TMP4d, MVM_BIGINT_32_FLAG;
        | jne >1;

        /* Both smallint, so try to do the addition. If it overflows, fall
         * back to the slow path. */
        | mov TMP4d, dword [TMP1 + val_offset]
        switch (op) {
            case MVM_OP_sp_add_I:
                | add TMP4d, dword [TMP2 + val_offset]
                break;
            case MVM_OP_sp_sub_I:
                | sub TMP4d, dword [TMP2 + val_offset]
                break;
            case MVM_OP_sp_mul_I:
                | imul TMP4d, dword [TMP2 + val_offset]
                break;
        }
        | jo >1

        /* No overflow. See if it's in integer cache range. */
        | cmp TMP4d, 14
        | jg >2
        | cmp TMP4d, -1
        | jl >2
        | inc TMP4d
        | mov64 TMP2, (MVMuint64)cache
        | mov TMP2, [TMP2 + TMP4d * 8]
        | mov WORK[c], TMP2
        | jmp >3
        |2:
        | mov dword [rbp-0x30], TMP4d;
        emit_fastcreate(tc, compiler, jg, ins);
        | mov aword WORK[c], RV;
        | mov TMP4d, dword [rbp-0x30];
        | mov dword [RV + offset], MVM_BIGINT_32_FLAG
        | mov dword [RV + val_offset], TMP4d
        | mov WORK[c], RV
        | jmp >3

        /* The slow path does a function call. Make sure to read the args
         * before storing the result value, in case they're aimed at the
         * same register. */
        |1:
        emit_fastcreate(tc, compiler, jg, ins);
        | mov ARG1, TC;
        | mov ARG2, WORK[a];
        | add ARG2, offset
        | mov ARG3, WORK[b];
        | add ARG3, offset
        | mov aword WORK[c], RV;
        | mov ARG4, RV;
        | add ARG4, offset
        switch (op) {
            case MVM_OP_sp_add_I:
                | callp &MVM_bigint_fallback_add;
                break;
            case MVM_OP_sp_sub_I:
                | callp &MVM_bigint_fallback_sub;
                break;
            case MVM_OP_sp_mul_I:
                | callp &MVM_bigint_fallback_mul;
                break;
        }
        |3:
        break;
    }
    case MVM_OP_sp_bool_I: {
        MVMint16 dst = ins->operands[0].reg.orig;
        MVMint16 a = ins->operands[1].reg.orig;
        MVMint16 offset = ins->operands[2].lit_i16;
        MVMint16 val_offset = offset + 4;
        /* Is this a smallint? */
        | mov TMP1, WORK[a];
        | mov TMP4d, dword [TMP1 + offset];
        | cmp TMP4d, MVM_BIGINT_32_FLAG;
        | jne >1;
        /* Smallbigint. Test the 32bit value for nonzero. */
        | mov TMP4d, dword [TMP1 + val_offset]
        | test TMP4d, TMP4d;
        | setnz TMP3b;
        | movzx TMP3, TMP3b;
        | mov WORK[dst], TMP3;
        | jmp >2
        |1:
        /* Bigint; check a->used for nonzero. */
        | mov TMP4, [TMP1 + offset]
        | mov TMP3d, dword MPINT:TMP4->used;
        | test TMP3d, TMP3d;
        | setnz TMP3b;
        | movzx TMP3, TMP3b;
        | mov WORK[dst], TMP3;
        |2:
        break;
    }
    default:
        MVM_panic(1, "Can't JIT opcode <%s>", ins->info->name);
    }
}



/* Call argument decoder */
static void load_call_arg(MVMThreadContext *tc, MVMJitCompiler *compiler, MVMJitGraph *jg,
                          MVMJitCallArg arg) {
    switch(arg.type) {
    case MVM_JIT_INTERP_VAR:
        switch (arg.v.ivar) {
        case MVM_JIT_INTERP_TC:
            | mov TMP6, TC;
            break;
        case MVM_JIT_INTERP_CU:
            | mov TMP6, CU;
            break;
        case MVM_JIT_INTERP_FRAME:
            | mov TMP6, TC->cur_frame;
            break;
        case MVM_JIT_INTERP_PARAMS:
            | mov TMP6, TC->cur_frame;
            | lea TMP6, FRAME:TMP6->params;
            break;
        case MVM_JIT_INTERP_CALLER:
            | mov TMP6, TC->cur_frame;
            | mov TMP6, aword FRAME:TMP6->caller;
            break;
        }
        break;
    case MVM_JIT_REG_VAL:
        | mov TMP6, qword WORK[arg.v.reg];
        break;
    case MVM_JIT_REG_VAL_F:
        | mov TMP6, qword WORK[arg.v.reg];
        break;
    case MVM_JIT_REG_ADDR:
        | lea TMP6, WORK[arg.v.reg];
        break;
    case MVM_JIT_STR_IDX:
        | get_string TMP6, arg.v.lit_i64;
        break;
    case MVM_JIT_LITERAL:
        | mov TMP6, arg.v.lit_i64;
        break;
    case MVM_JIT_LITERAL_64:
    case MVM_JIT_LITERAL_PTR:
    case MVM_JIT_LITERAL_F:
        | mov64 TMP6, arg.v.lit_i64;
        break;
    case MVM_JIT_REG_STABLE:
        | mov TMP6, qword WORK[arg.v.reg];
        | mov TMP6, OBJECT:TMP6->st;
        break;
    case MVM_JIT_REG_OBJBODY:
        | mov TMP6, qword WORK[arg.v.reg];
        | lea TMP6, STOOGE:TMP6->data;
        break;
    case MVM_JIT_REG_DYNIDX:
        | get_cur_op TMP5;
        | xor TMP6, TMP6;
        | mov TMP6w, U16:TMP5[arg.v.reg];
        | mov TMP6, qword [WORK + TMP6*8];
        break;
    case MVM_JIT_DATA_LABEL:
        | lea TMP6, [=>(arg.v.lit_i64)];
        break;
    case MVM_JIT_ARG_I64:
        | mov TMP6, TC->cur_frame;
        | mov TMP6, FRAME:TMP6->args;
        | mov TMP6, qword REGISTER:TMP6[arg.v.lit_i64];
        break;
    case MVM_JIT_ARG_DOUBLE:
        | mov TMP6, TC->cur_frame;
        | mov TMP6, FRAME:TMP6->args;
        | mov TMP6, qword REGISTER:TMP6[arg.v.lit_i64];
        break;
    case MVM_JIT_ARG_I64_RW:
        | mov TMP6, TC->cur_frame;
        | mov TMP6, FRAME:TMP6->args;
        | lea TMP6, qword REGISTER:TMP6[arg.v.lit_i64];
        break;
    case MVM_JIT_ARG_PTR:
        | mov TMP6, TC->cur_frame;
        | mov TMP6, FRAME:TMP6->args;
        | mov TMP6, qword REGISTER:TMP6[arg.v.lit_i64];
        | mov TMP6, aword STOOGE:TMP6->data;
        break;
    case MVM_JIT_ARG_VMARRAY:
        | mov TMP6, TC->cur_frame;
        | mov TMP6, FRAME:TMP6->args;
        | mov TMP6, qword REGISTER:TMP6[arg.v.lit_i64];
        | mov TMP6, aword VMARRAY:TMP6->body.slots;
        break;
    case MVM_JIT_PARAM_I64:
        | mov TMP6, qword WORK[arg.v.lit_i64];
        break;
    case MVM_JIT_PARAM_I64_RW:
        | lea TMP6, qword WORK[arg.v.lit_i64];
        break;
    case MVM_JIT_PARAM_DOUBLE:
        | mov TMP6, qword WORK[arg.v.lit_i64];
        break;
    case MVM_JIT_PARAM_PTR:
        | mov TMP6, aword WORK[arg.v.lit_i64];
        | mov TMP6, aword STOOGE:TMP6->data;
        break;
    case MVM_JIT_PARAM_VMARRAY:
        | mov TMP6, aword WORK[arg.v.lit_i64];
        | mov TMP6, aword VMARRAY:TMP6->body.slots;
        break;
    case MVM_JIT_SPESH_SLOT_VALUE:
        | get_spesh_slot TMP6, arg.v.lit_i64;
        break;
    case MVM_JIT_STACK_VALUE:
        | mov TMP6, [rbp-(0x28+arg.v.lit_i64*8)];
        break;
    default:
        MVM_oops(tc, "JIT: Unknown JIT argument type %d", arg.type);
    }
}

static void emit_gpr_arg(MVMThreadContext *tc, MVMJitCompiler *compiler, MVMJitGraph *jg,
                         MVMint32 i) {
    switch (i) {
    case 0:
        | mov ARG1, TMP6;
        break;
    case 1:
        | mov ARG2, TMP6;
        break;
    case 2:
        | mov ARG3, TMP6;
        break;
    case 3:
        | mov ARG4, TMP6;
        break;
|.if POSIX
||    case 4:
|        mov ARG5, TMP6;
||       break;
||  case 5:
|      mov ARG6, TMP6;
||     break;
|.endif
    default:
        MVM_oops(tc, "JIT: can't store %d arguments in GPR", i);
    }
}

static void emit_sse_arg(MVMThreadContext *tc, MVMJitCompiler *compiler, MVMJitGraph *jg,
                         MVMint32 i) {
    switch (i) {
    case 0:
        | movd ARG1F, TMP6;
        break;
    case 1:
        | movd ARG2F, TMP6;
        break;
    case 2:
        | movd ARG3F, TMP6;
        break;
    case 3:
        | movd ARG4F, TMP6;
        break;
|.if POSIX
||    case 4:
|        movd ARG5F, TMP6;
||       break;
||    case 5:
|         movd ARG6F, TMP6;
||        break;
||    case 6:
|        movd ARG7F, TMP6;
||       break;
||    case 7:
|        movd ARG8F, TMP6;
||       break;
|.endif
    default:
        MVM_oops(tc, "JIT: can't put  %d arguments in SSE", i);
    }
}

static void emit_stack_arg(MVMThreadContext *tc, MVMJitCompiler *compiler, MVMJitGraph *jg,
                           MVMint32 arg_size, MVMint32 pos) {
    /* basically, stack arguments are passed in right-to-left order
       on both POSIX and W64 backends, it seems. Thus the most logical
       thing to do is to count from the stack top upwards. */
    if (pos + arg_size > 160) {
        MVM_oops(tc, "JIT: trying to pass arguments "
                 " in local space (stack top offset:"
                 " %d, size: %d)", pos, arg_size);
    }
    switch(arg_size) {
    case 1:
        | mov byte [rsp+pos], TMP6b;
        break;
    case 2:
        | mov word [rsp+pos], TMP6w;
        break;
    case 4:
        | mov dword [rsp+pos], TMP6d;
        break;
    case 8:
        | mov qword [rsp+pos], TMP6;
        break;
    default:
        MVM_oops(tc, "JIT: can't pass arguments size %d bytes",
                 arg_size);
    }
}

static void emit_posix_callargs(MVMThreadContext *tc, MVMJitCompiler *compiler, MVMJitGraph *jg,
                                MVMJitCallArg args[], MVMint32 num_args) {
    MVMint32 num_gpr = 0, num_fpr = 0, num_stack = 0, i;
    MVMJitCallArg in_gpr[6], in_fpr[8], *on_stack = NULL;
    if (num_args > 6)
        on_stack = MVM_malloc(sizeof(MVMJitCallArg) * (num_args - 6));
    /* divide in gpr, fpr, stack values */
    for (i = 0; i < num_args; i++) {
        switch (args[i].type) {
        case MVM_JIT_INTERP_VAR:
        case MVM_JIT_REG_VAL:
        case MVM_JIT_REG_ADDR:
        case MVM_JIT_REG_OBJBODY:
        case MVM_JIT_REG_STABLE:
        case MVM_JIT_REG_DYNIDX:
        case MVM_JIT_STR_IDX:
        case MVM_JIT_LITERAL:
        case MVM_JIT_LITERAL_64:
        case MVM_JIT_LITERAL_PTR:
        case MVM_JIT_DATA_LABEL:
        case MVM_JIT_ARG_I64:
        case MVM_JIT_ARG_I64_RW:
        case MVM_JIT_ARG_PTR:
        case MVM_JIT_ARG_VMARRAY:
        case MVM_JIT_PARAM_I64:
        case MVM_JIT_PARAM_I64_RW:
        case MVM_JIT_PARAM_PTR:
        case MVM_JIT_PARAM_VMARRAY:
        case MVM_JIT_SPESH_SLOT_VALUE:
        case MVM_JIT_STACK_VALUE:
            if (num_gpr < 6) {
                in_gpr[num_gpr++] = args[i];
            } else {
                on_stack[num_stack++] = args[i];
            }
            break;
        case MVM_JIT_ARG_DOUBLE:
        case MVM_JIT_PARAM_DOUBLE:
        case MVM_JIT_REG_VAL_F:
        case MVM_JIT_LITERAL_F:
            if (num_fpr < 8) {
                in_fpr[num_fpr++] = args[i];
            } else {
                on_stack[num_stack++] = args[i];
            }
            break;
        default:
            MVM_oops(tc, "JIT: Unknown JIT argument type %d for emit_posix_callargs", args[i].type);
        }
    }
    for (i = 0; i < num_gpr; i++) {
        load_call_arg(tc, compiler, jg, in_gpr[i]);
        emit_gpr_arg(tc, compiler, jg, i);
    }
    for (i = 0; i < num_fpr; i++) {
        load_call_arg(tc, compiler, jg, in_fpr[i]);
        emit_sse_arg(tc, compiler, jg, i);
    }
    /* push right-to-left */
    for (i = 0; i < num_stack; i++) {
        load_call_arg(tc, compiler, jg, on_stack[i]);
        // I'm not sure this is correct, btw
        emit_stack_arg(tc, compiler, jg, 8, i*8);
    }
    if (on_stack)
        MVM_free(on_stack);
}

static void emit_win64_callargs(MVMThreadContext *tc, MVMJitCompiler *compiler, MVMJitGraph *jg,
                                MVMJitCallArg args[], MVMint32 num_args) {
    MVMint32 i;
    MVMint32 num_reg_args = (num_args > 4 ? 4 : num_args);
    for (i = 0; i < num_reg_args; i++) {
        load_call_arg(tc, compiler, jg, args[i]);
        if (args[i].type == MVM_JIT_REG_VAL_F ||
            args[i].type == MVM_JIT_LITERAL_F ||
            args[i].type == MVM_JIT_ARG_DOUBLE ||
            args[i].type == MVM_JIT_PARAM_DOUBLE) {
            emit_sse_arg(tc, compiler, jg, i);
        } else {
            emit_gpr_arg(tc, compiler, jg, i);
        }
    }
    for (; i < num_args; i++) {
        load_call_arg(tc, compiler, jg, args[i]);
        emit_stack_arg(tc, compiler, jg, 8, i * 8);
    }
}

void MVM_jit_emit_call_c(MVMThreadContext *tc, MVMJitCompiler *compiler, MVMJitGraph *jg,
                         MVMJitCallC * call_spec) {

    |.if WIN32;
    || emit_win64_callargs(tc, compiler, jg, call_spec->args, call_spec->num_args);
    |.else;
    || emit_posix_callargs(tc, compiler, jg, call_spec->args, call_spec->num_args);
    |.endif
    /* Emit the call. I think we should be able to do something smarter than
     * store the constant into the bytecode, like a data segment. But I'm
     * not sure. */
    | callp call_spec->func_ptr;
    /* right, now determine what to do with the return value */
    switch(call_spec->rv_mode) {
    case MVM_JIT_RV_VOID:
        break;
    case MVM_JIT_RV_INT:
    case MVM_JIT_RV_PTR:
        | mov WORK[call_spec->rv_idx], RV;
        break;
    case MVM_JIT_RV_NUM:
        | movsd qword WORK[call_spec->rv_idx], RVF;
        break;
    case MVM_JIT_RV_DEREF:
        | mov TMP1, [RV];
        | mov WORK[call_spec->rv_idx], TMP1;
        break;
    case MVM_JIT_RV_ADDR:
        /* store local at address */
        | mov TMP1, WORK[call_spec->rv_idx];
        | mov [RV], TMP1;
        break;
    case MVM_JIT_RV_DYNIDX:
        /* store in register relative to cur_op */
        | get_cur_op TMP1;
        | xor TMP2, TMP2;
        | mov TMP2w, word [TMP1 + call_spec->rv_idx*2];
        | mov aword [WORK + TMP2*8], RV;
        break;
    case MVM_JIT_RV_DEREF_OR_VMNULL:
        | test RV, RV;
        | jz >4;
        | mov TMP1, [RV];
        | jmp >5;
        |4:
        | get_vmnull TMP1;
        |5:
        | mov WORK[call_spec->rv_idx], TMP1;
        break;
    case MVM_JIT_RV_TO_STACK:
        if (call_spec->rv_type == MVM_NATIVECALL_ARG_CHAR) {
        | cbw;
        }
        if (call_spec->rv_type == MVM_NATIVECALL_ARG_CHAR || call_spec->rv_type == MVM_NATIVECALL_ARG_SHORT) {
        | cwde;
        }
        if (call_spec->rv_type == MVM_NATIVECALL_ARG_CHAR || call_spec->rv_type == MVM_NATIVECALL_ARG_SHORT || call_spec->rv_type == MVM_NATIVECALL_ARG_INT) {
        | cdqe;
        }
        if (call_spec->rv_type == MVM_NATIVECALL_ARG_UCHAR) {
        | and RV, 0xFF
        }
        else if (call_spec->rv_type == MVM_NATIVECALL_ARG_USHORT) {
        | and RV, 0xFFFF
        }
        else if (call_spec->rv_type == MVM_NATIVECALL_ARG_UINT) {
        | and RV, 0xFFFFFFFF
        }
        else if (call_spec->rv_type == MVM_NATIVECALL_ARG_ULONG && sizeof(long) == 4) {
        | and RV, 0xFFFFFFFF
        }
        | mov [rbp-(0x28+call_spec->rv_idx*8)], RV;
        break;
    }
}

void MVM_jit_emit_block_branch(MVMThreadContext *tc, MVMJitCompiler *compiler, MVMJitGraph *jg,
                               MVMJitBranch * branch) {
    MVMSpeshIns *ins = branch->ins;
    MVMint32 name = branch->dest;
    /* move gc sync point to the front so as to not have
     * awkward dispatching issues */
    | gc_sync_point;
    if (ins == NULL || ins->info->opcode == MVM_OP_goto) {
        if (name == MVM_JIT_BRANCH_EXIT) {
            | jmp ->exit
        } else {
            | jmp =>(name)
        }
    } else {
        MVMint16 val = ins->operands[0].reg.orig;
        switch(ins->info->opcode) {
        case MVM_OP_if_i:
            | mov rax, WORK[val];
            | test rax, rax;
            | jnz =>(name); // jump to dynamic label
            break;
        case MVM_OP_unless_i:
            | mov rax, WORK[val];
            | test rax, rax;
            | jz =>(name);
            break;
        case MVM_OP_if_n:
            | movd xmm0, qword WORK[val];
            | xorpd xmm1, xmm1; // make it zero
            | ucomisd xmm0, xmm1;
            | jp =>(name);  // is NaN?
            | jne =>(name); // not equal to zero? we're golden
            break;
        case MVM_OP_unless_n:
            | movd xmm0, qword WORK[val];
            | xorpd xmm1, xmm1; // make it zero
            | ucomisd xmm0, xmm1;
            | jp >1; // is NaN
            | jne >1; // is not zero
            | jmp =>(name); // it is zero yay!
            |1:
            break;
        case MVM_OP_if_s0:
        case MVM_OP_unless_s0:
            | mov ARG1, TC;
            | mov ARG2, WORK[val];
            | callp &MVM_coerce_istrue_s;
            | test RV, RV;
            if (ins->info->opcode == MVM_OP_unless_s0)
                | jz =>(name);
            else
                | jnz =>(name);
            break;
        case MVM_OP_ifnonnull:
            | mov TMP1, WORK[val];
            | test TMP1, TMP1;
            | jz >1;
            | get_vmnull TMP2;
            | cmp TMP1, TMP2;
            | je >1;
            | jmp =>(name);
            |1:
            break;
        case MVM_OP_if_s:
            | mov TMP1, WORK[val];
            | test TMP1, TMP1;
            | jz >1;
            | cmp dword STRING:TMP1->body.num_graphs, 0;
            | je >1;
            | jmp =>(name);
            |1:
            break;
        case MVM_OP_unless_s:
            | mov TMP1, WORK[val];
            | test TMP1, TMP1;
            | jz =>(name);
            | cmp dword STRING:TMP1->body.num_graphs, 0;
            | je =>(name);
            |1:
            break;
        case MVM_OP_indexat:
        case MVM_OP_indexnat: {
            MVMint16 offset = ins->operands[1].reg.orig;
            MVMuint32 str_idx = ins->operands[2].lit_str_idx;
            | mov ARG1, TC;
            | mov ARG2, WORK[val];
            | mov ARG3, WORK[offset];
            | get_string ARG4, str_idx;
            | callp &MVM_string_char_at_in_string;
            /* This subtlety is due to the value being overloaded to
             * -2 if it is out of bounds. Note that -1 is passed as a
             * 32 bit integer, but this magically works in a 64 bit
             * comparison because 32 bit values are sign-extended */
            | cmp RV, -1;
            if (ins->info->opcode == MVM_OP_indexat)
                | jle =>(name);
            else {

                | jne =>(name);
            }
            break;
        }
        default:
            MVM_panic(1, "JIT: Can't handle conditional <%s>", ins->info->name);
        }
    }
}

void MVM_jit_emit_label(MVMThreadContext *tc, MVMJitCompiler *compiler, MVMJitGraph *jg,
                        MVMint32 label) {
    | =>(label):
}

void MVM_jit_emit_branch(MVMThreadContext *tc, MVMJitCompiler *compiler, MVMint32 label) {
    | jmp =>(label);
}

void MVM_jit_emit_conditional_branch(MVMThreadContext *tc, MVMJitCompiler *compiler,
                                     MVMint32 cond, MVMint32 label, MVMuint8 test_type) {
    MVMint32 is_float = (test_type == MVM_reg_num32 || test_type == MVM_reg_num64);
    /* Intel Manual Vol 1 ch7 Table 7-4 'Conditional Jump Instructions',
     * also Table 8-7 'Setting of EFLAG Status Flags for Floating Point Number Comparisons' */
    switch (cond) {
    case MVM_JIT_LT:
        if (is_float) {
            /* CF & !PF */
            | jp >1;
            | jb =>(label);
            |1:
        } else {
            /* SF ^ OF */
            | jl =>(label);
        }
        break;
    case MVM_JIT_LE:
        if (is_float) {
            /* (CF | ZF) & !PF */
            | jp >1;
            | jbe =>(label);
            |1:
        } else {
            /* (SF ^ OF) | ZF */
            | jle =>(label);
        }
        break;
    case MVM_JIT_EQ:
        if (is_float) {
            /* ZF & !PF */
            | jp >1;
            | je =>(label);
            |1:
        } else {
            /* ZF */
            | je =>(label);
        }
        break;
    case MVM_JIT_NE:
        if (is_float) {
            /* PF | !ZF */
            | jp =>(label);
        }
        /* !ZF */
        | jne =>(label);
        break;
    case MVM_JIT_GE:
        if (is_float) {
            /* !CF */
            | jae =>(label);
        } else {
            /* !(SF ^ OF) */
            | jge =>(label);
        }
        break;
    case MVM_JIT_GT:
        if (is_float) {
            /* !(CF | ZF) */
            | ja =>(label);
        } else {
            /* !((SF ^ OF) | ZF) */
            | jg =>(label);
        }
        break;
    case MVM_JIT_NZ:
        if (is_float) {
            /* PF set, Unordered to zero is nonzero */
            | jp =>(label);
        }
        | jnz =>(label);
        break;
    case MVM_JIT_ZR:
        if (is_float) {
            | jp >1;
            | jz =>(label);
            |1:
        } else {
            | jz =>(label);
        }
        break;
    default:
        abort();
    }

}

void MVM_jit_emit_guard(MVMThreadContext *tc, MVMJitCompiler *compiler, MVMJitGraph *jg,
                        MVMJitGuard *guard) {
    MVMint16 op        = guard->ins->info->opcode;
    MVMint16 obj       = guard->ins->operands[op == MVM_OP_sp_guardsf ? 0 : 1].reg.orig;
    /* load object and spesh slot value, except for those that don't need it */
    | mov TMP1, WORK[obj];
    if (op != MVM_OP_sp_guardjustconc && op != MVM_OP_sp_guardjusttype) {
        MVMint16 spesh_idx = guard->ins->operands[op == MVM_OP_sp_guardsf ? 1 : 2].lit_i16;
        | get_spesh_slot TMP2, spesh_idx;
    }
    if (op == MVM_OP_sp_guard) {
        /* object in question should just match the type, so it shouldn't
         * be zero, and the STABLE should be equal to the value in the spesh
         * slot */
        /* check for null */
        | test TMP1, TMP1;
        | jz >1;
        /* get stable and compare */
        | cmp TMP2, OBJECT:TMP1->st;
        | jne >1;
        /* we're good, no need to deopt */
    } else if (op == MVM_OP_sp_guardtype) {
        /* object in question should be a type object, so it shouldn't
         * be zero, should not be concrete, and the STABLE should be
         * equal to the value in the spesh slot */
        /* check for null */
        | test TMP1, TMP1;
        | jz >1;
        /* check if type object (not concrete) */
        | test_type_object TMP1;
        /* if zero, this is a concrete object, and we should deopt */
        | jz >1;
        /* get stable and compare */
        | cmp TMP2, OBJECT:TMP1->st;
        | jne >1;
        /* we're good, no need to deopt */
    } else if (op == MVM_OP_sp_guardconc) {
        /* object should be a non-null concrete (non-type) object */
        | test TMP1, TMP1;
        | jz >1;
        /* shouldn't be type object */
        | test_type_object TMP1;
        | jnz >1;
        /* should have our stable */
        | cmp TMP2, OBJECT:TMP1->st;
        | jne >1;
    } else if (op == MVM_OP_sp_guardsf) {
        /* Should be an MVMCode */
        | cmp_repr_id TMP1, TMP3, MVM_REPR_ID_MVMCode;
        | jne >1;
        | cmp TMP2, CODE:TMP1->body.sf;
        | jne >1;
    } else if (op == MVM_OP_sp_guardobj) {
        /* object should match that from the spesh slot */
        | cmp TMP2, TMP1;
        | jne >1;
    } else if (op == MVM_OP_sp_guardnotobj) {
        /* object should not match that from the spesh slot */
        | cmp TMP2, TMP1;
        | je >1;
    } else if (op == MVM_OP_sp_guardjustconc) {
        /* object should be a non-null concrete (non-type) object;
         * exact type doesn't matter */
        | test TMP1, TMP1;
        | jz >1;
        /* shouldn't be type object */
        | test_type_object TMP1;
        | jnz >1;
    } else if (op == MVM_OP_sp_guardjusttype) {
        /* object in question should be a type object, so it shouldn't
         * be zero, and should not be concrete */
        /* check for null */
        | test TMP1, TMP1;
        | jz >1;
        /* should be type object */
        | test_type_object TMP1;
        | jz >1;
    }
    /* If we're here, we didn't jump to deopt. Most of the time a guard
     * destination is the same real register, but if not then emit a store
     * here before jumping over the deopt code. */
    if (op != MVM_OP_sp_guardsf) {
        MVMint16 dest = guard->ins->operands[0].reg.orig;
        if (dest != obj)
            | mov WORK[dest], TMP1
    }
    | jmp >2;
    |1:
    /* emit deopt */
    | mov ARG1, TC;
    | mov ARG2, guard->deopt_idx;
    | callp &MVM_spesh_deopt_one;
    /* jump out */
    | jmp ->exit;
    |2:
}

void MVM_jit_emit_invoke(MVMThreadContext *tc, MVMJitCompiler *compiler, MVMJitGraph *jg, MVMJitInvoke *invoke) {
    MVMint16 i;
    MVMuint16 callsite_idx = invoke->callsite_idx;

    /* The return address for the interpreter */
    | get_cur_op TMP2;
    | mov TMP5, TC->cur_frame;
    | mov aword FRAME:TMP5->return_address, TMP2;

    /* Unless it's a resolve op (in which case we delay a lot of this)... */
    if (!invoke->is_resolve) {
        /* Store callsite in tmp6, which we use at the end of invoke */
        | mov TMP6, CU->body.callsites;
        | mov TMP6, CALLSITEPTR:TMP6[callsite_idx];

        /* Store callsite in the frame. I use TMP5 as it never conflicts
         * with argument passing (like TMP6, but unlike other TMP regs) */
        | mov FRAME:TMP5->cur_args_callsite, TMP6;

        /* Setup the frame for returning to our current position */
        if (sizeof(MVMReturnType) == 1) {
            | mov byte FRAME:TMP5->return_type, invoke->return_type;
        } else {
            MVM_panic(1, "JIT: MVMReturnType has unexpected size");
        }

        /* The register for our return value */
        if (invoke->return_type == MVM_RETURN_VOID) {
            | mov aword FRAME:TMP5->return_value, NULL;
        } else {
            | lea TMP2, WORK[invoke->return_register];
            | mov aword FRAME:TMP5->return_value, TMP2;
        }
    }

    /* Install invoke args */
    | mov TMP5, FRAME:TMP5->args;
    for (i = 0;  i < invoke->arg_count; i++) {
        MVMSpeshIns *ins = invoke->arg_ins[i];
        switch (ins->info->opcode) {
        case MVM_OP_arg_i:
        case MVM_OP_arg_s:
        case MVM_OP_arg_n:
        case MVM_OP_arg_o: {
            MVMint16 dst = ins->operands[0].lit_i16;
            MVMint16 src = ins->operands[1].reg.orig;
            | mov TMP4, WORK[src];
            | mov REGISTER:TMP5[dst], TMP4;
            break;
        }
        case MVM_OP_argconst_n:
        case MVM_OP_argconst_i: {
            MVMint16 dst = ins->operands[0].lit_i16;
            MVMint64 val = ins->operands[1].lit_i64;
            | mov64 TMP4, val;
            | mov REGISTER:TMP5[dst], TMP4;
            break;
        }
        case MVM_OP_argconst_s: {
            MVMint16 dst = ins->operands[0].lit_i16;
            MVMint32 idx = ins->operands[1].lit_str_idx;
            | get_string TMP4, idx;
            | mov REGISTER:TMP5[dst], TMP4;
            break;
        }
        default:
            MVM_panic(1, "JIT invoke: Can't add arg <%s>",
                      ins->info->name);
        }
    }

    if (invoke->is_fast) {
        /* call MVM_frame_invoke_code */
        | mov ARG1, TC;
        | mov ARG2, WORK[invoke->code_register_or_name];
        | mov ARG3, TMP6; // this is the callsite object
        | mov ARG4, invoke->spesh_cand_or_sf_slot;
        | callp &MVM_frame_invoke_code;
    } else if (invoke->is_resolve) {
        /* call MVM_spesh_plugin_resolve_jit, which will trampoline out of
         * the JIT-compiled code if need be */
        | mov ARG1, TC;
        | get_string ARG2, invoke->code_register_or_name;
        | lea ARG3, WORK[invoke->return_register];
        | mov ARG4, invoke->resolve_offset;
        |.if WIN32
        | get_spesh_slot rax, invoke->spesh_cand_or_sf_slot;
        | mov ARG5, rax;
        | mov rax, CU->body.callsites;
        | mov rax, CALLSITEPTR:rax[callsite_idx];
        | mov ARG6, rax;
        |.else
        | get_spesh_slot ARG5, invoke->spesh_cand_or_sf_slot;
        | mov ARG6, CU->body.callsites;
        | mov ARG6, CALLSITEPTR:ARG6[callsite_idx];
        |.endif
        | callp &MVM_spesh_plugin_resolve_jit;
    } else {
        /* first, save callsite and args */
        | mov qword [rbp-0x28], TMP5; // args
        | mov qword [rbp-0x30], TMP6; // callsite
        /* setup call MVM_frame_multi_ok(tc, code, &cur_callsite, args); */
        | mov ARG1, TC;
        | mov ARG2, WORK[invoke->code_register_or_name]; // code object
        | lea ARG3, [rbp-0x30];                          // &cur_callsite
        | mov ARG4, TMP5;                                // args
        | mov ARG5, 0;                           // NULL to &was_multi
        | callp &MVM_frame_find_invokee_multi_ok;
        /* restore callsite, args, RV now holds code object */
        | mov TMP6, [rbp-0x30]; // callsite
        | mov TMP5, [rbp-0x28]; // args
        /* setup args for call to invoke(tc, code, cur_callsite, args) */
        | mov ARG1, TC;
        | mov ARG2, RV;   // code object
        | mov ARG3, TMP6; // callsite
        | mov ARG4, TMP5; // args
        /* get the actual function */
        | mov FUNCTION, OBJECT:RV->st;
        | mov FUNCTION, STABLE:FUNCTION->invoke;
        | call FUNCTION;
    }
}

void MVM_jit_emit_jumplist(MVMThreadContext *tc, MVMJitCompiler *compiler, MVMJitGraph *jg,
                           MVMJitJumpList *jumplist) {
    MVMint32 i;
    | mov TMP1, WORK[jumplist->reg];
    | cmp TMP1, 0;
    | jl >2;
    | cmp TMP1, jumplist->num_labels;
    | jge >2;
    | imul TMP1, 0x8; // 8 bytes per goto
    | lea TMP2, [>1];
    | add TMP2, TMP1;
    | jmp TMP2;
    |.align 8;
    |1:
    for (i = 0; i < jumplist->num_labels; i++) {
        |=>(jumplist->in_labels[i]):
        | jmp =>(jumplist->out_labels[i]);
        |.align 8;
    }
    |2:
}

void MVM_jit_emit_control(MVMThreadContext *tc, MVMJitCompiler *compiler,
                          MVMJitControl *ctrl, MVMJitTile *tile) {
    MVMJitControlType type = (tile != NULL ? (MVMJitControlType)tile->args[0] : ctrl->type);
    if (type == MVM_JIT_CONTROL_BREAKPOINT) {
        /* Debug breakpoint */
        | int 3;
    } else {
        MVM_panic(1, "Unknown control code: <%s>", ctrl->ins->info->name);
    }
}


/* Convenience macros for testing the register class of a register id */
#define IS_GPR(x) ((x) >= MVM_JIT_REG(RAX) && (x) <= MVM_JIT_REG(R15))
#define IS_FPR(x) ((x) >= MVM_JIT_REG(XMM0) && (x) <= MVM_JIT_REG(XMM15))
#define REG_NUM(x) ((x) & 0xf)

void MVM_jit_emit_load(MVMThreadContext *tc, MVMJitCompiler *compiler,
                       MVMint8 reg_dst, MVMJitStorageClass mem_cls, MVMint32 mem_src, MVMint32 size) {
    MVMint8 mem_base;
    if (mem_cls == MVM_JIT_STORAGE_LOCAL) {
        mem_base = MVM_JIT_REG(RBX);
    } else if (mem_cls == MVM_JIT_STORAGE_STACK) {
        mem_base = MVM_JIT_REG(RSP);
    } else {
        abort();
    }
    if (IS_GPR(reg_dst)) {
        switch(size) {
        case 1:
            | mov Rb(reg_dst), byte [Rq(mem_base)+mem_src];
            return;
        case 2:
            | mov Rw(reg_dst), word [Rq(mem_base)+mem_src];
            return;
        case 4:
            | mov Rd(reg_dst), dword [Rq(mem_base)+mem_src];
            return;
        case 8:
            | mov Rq(reg_dst), qword [Rq(mem_base)+mem_src];
            return;
        }
    } else if (IS_FPR(reg_dst)) {
        MVMint8 reg_num = REG_NUM(reg_dst);
        switch(size) {
        case 8:
            | movsd xmm(reg_num), qword [Rq(mem_base)+mem_src];
            return;
        }
    }
    abort();
}

void MVM_jit_emit_store(MVMThreadContext *tc, MVMJitCompiler *compiler,
                        MVMJitStorageClass mem_cls, MVMint32 mem_dst,
                        MVMint8 reg_src, MVMint32 size) {
    MVMint8 mem_base;
    if (mem_cls == MVM_JIT_STORAGE_LOCAL) {
        mem_base = MVM_JIT_REG(RBX);
    } else if (mem_cls == MVM_JIT_STORAGE_STACK) {
        mem_base = MVM_JIT_REG(RSP);
    } else {
        abort();
    }

    if (IS_GPR(reg_src)) {
        switch (size) {
        case 1:
            | mov byte [Rq(mem_base)+mem_dst], Rb(reg_src);
            return;
        case 2:
            | mov word [Rq(mem_base)+mem_dst], Rw(reg_src);
            return;
        case 4:
            | mov dword [Rq(mem_base)+mem_dst], Rd(reg_src);
            return;
        case 8:
            | mov qword [Rq(mem_base)+mem_dst], Rq(reg_src);
            return;
        }
    } else if (IS_FPR(reg_src)) {
        reg_src = reg_src & 0xf;
        switch (size) {
        case 8:
            | movsd qword [Rq(mem_base)+mem_dst], xmm(reg_src);
            return;
        }
    }
    abort();
}

void MVM_jit_emit_copy(MVMThreadContext *tc, MVMJitCompiler *compiler,
                       MVMint8 dst_reg, MVMint8 src_reg) {

    if (IS_GPR(dst_reg)) {
        if (IS_GPR(src_reg)) {
            | mov Rq(dst_reg), Rq(src_reg);
        } else { // src_cls == MVM_JIT_STORAGE_FPR
            | movd Rq(dst_reg), xmm(REG_NUM(src_reg));
        }
    } else if (IS_FPR(src_reg)) { // dst_cls == MVM_JIT_STORAGE_FPR
        | movq xmm(REG_NUM(dst_reg)), xmm(REG_NUM(src_reg));
    } else { // src_cls == MVM_JIT_STORAGE_GPR
        | movd xmm(REG_NUM(dst_reg)), Rq(src_reg);
    }
}


void MVM_jit_emit_marker(MVMThreadContext *tc, MVMJitCompiler *compiler, MVMint32 num) {
    MVMint32 i;
    for (i = 0; i < num; i++) {
        | nop;
    }
}


void MVM_jit_emit_data(MVMThreadContext *tc, MVMJitCompiler *compiler, MVMJitData *data) {
    MVMuint8 *bytes = data->data;
    size_t i;
    |.data;
    |=>(data->label):
    for (i = 0; i < data->size; i++) {
        |.byte bytes[i];
    }
    |.code
}

void MVM_jit_emit_deopt_check(MVMThreadContext *tc, MVMJitCompiler *compiler) {
    | mov TMP6, TC->cur_frame;
    | mov TMP6, FRAME:TMP6->spesh_cand
    | test TMP6, TMP6
    | jnz >1
    | jmp ->exit
    |1:
}

/* import tiles */
|.include src/jit/x64/tiles.dasc
